3232 KinesisBrokerConfig ,
3333 OpenLineageConnection ,
3434)
35+ from metadata .generated .schema .entity .services .databaseService import DatabaseService
3536from metadata .generated .schema .entity .services .ingestionPipelines .status import (
3637 StackTraceError ,
3738)
6970 get_or_create_pipeline_service ,
7071 resolve_pipeline_service_type ,
7172)
73+ from metadata .ingestion .source .pipeline .openlineage .table_resolver import (
74+ extract_db_scheme_from_namespace ,
75+ find_service_by_namespace_mapping ,
76+ find_services_by_scheme ,
77+ )
7278from metadata .ingestion .source .pipeline .openlineage .utils import (
7379 FQNNotFoundException ,
7480 message_to_open_lineage_event ,
@@ -113,6 +119,9 @@ def create(
113119 def prepare (self ):
114120 self ._service_cache = {}
115121 self ._current_pipeline_service = None
122+ self ._entity_cache : Dict [str , Any ] = {}
123+ self ._namespace_to_service_cache : Dict [str , Optional [List [str ]]] = {}
124+ self ._db_service_type_map : Dict [str , str ] = self ._build_db_service_type_map ()
116125
117126 def close (self ) -> None :
118127 self .metadata .compute_percentile (Pipeline , self .today )
@@ -214,7 +223,94 @@ def _get_topic_details(data: Dict) -> TopicDetails:
214223
215224 return TopicDetails (name = name , broker_hostname = broker_hostname )
216225
217- def _get_table_fqn (self , table_details : TableDetails ) -> Optional [str ]:
226+ def _get_by_name_cached (self , entity_class , fqn_str : str , ** kwargs ):
227+ """Wrapper around metadata.get_by_name with in-memory caching."""
228+ if not hasattr (self , "_entity_cache" ):
229+ return self .metadata .get_by_name (entity_class , fqn_str , ** kwargs )
230+ key = f"{ entity_class .__name__ } :{ fqn_str } "
231+ if key not in self ._entity_cache :
232+ self ._entity_cache [key ] = self .metadata .get_by_name (
233+ entity_class , fqn_str , ** kwargs
234+ )
235+ return self ._entity_cache [key ]
236+
237+ def _build_db_service_type_map (self ):
238+ """Build a map of {service_name: DatabaseServiceType} filtered to configured dbServiceNames."""
239+ type_map = {}
240+ for service_name in self .get_db_service_names ():
241+ try :
242+ svc = self .metadata .get_by_name (DatabaseService , service_name )
243+ if svc and svc .serviceType :
244+ type_map [service_name ] = svc .serviceType
245+ except Exception :
246+ logger .debug (f"Could not fetch DB service: { service_name } " )
247+ return type_map
248+
249+ def _resolve_db_services_for_namespace (self , namespace : str ) -> Optional [List [str ]]:
250+ """
251+ Resolve which DB services to search for a given OL dataset namespace.
252+
253+ Resolution order:
254+ 1. Check namespaceToServiceMapping config (exact then prefix match).
255+ 2. Extract scheme from namespace, filter services by matching DB type.
256+ If exactly one match -> use it. If multiple -> log warning and return all.
257+ 3. Return None -> caller falls back to all dbServiceNames.
258+ """
259+ if not hasattr (self , "_namespace_to_service_cache" ):
260+ return None
261+
262+ if namespace in self ._namespace_to_service_cache :
263+ return self ._namespace_to_service_cache [namespace ]
264+
265+ result = None
266+ configured = set (self .get_db_service_names () or [])
267+
268+ mapping = (
269+ getattr (self .service_connection , "namespaceToServiceMapping" , None ) or {}
270+ )
271+ mapped_service = find_service_by_namespace_mapping (namespace , mapping )
272+ if mapped_service and mapped_service in configured :
273+ result = [mapped_service ]
274+ else :
275+ # Auto-discover by extracting the DB scheme from the namespace URL
276+ db_scheme = extract_db_scheme_from_namespace (namespace )
277+ if db_scheme :
278+ matched = find_services_by_scheme (db_scheme , self ._db_service_type_map )
279+ if len (matched ) == 1 :
280+ result = matched
281+ elif len (matched ) > 1 :
282+ logger .warning (
283+ f"Namespace '{ namespace } ' (scheme={ db_scheme } ) matches "
284+ f"multiple DB services: { matched } . Configure "
285+ f"'namespaceToServiceMapping' to disambiguate."
286+ )
287+ result = matched
288+
289+ self ._namespace_to_service_cache [namespace ] = result
290+ return result
291+
292+ def _find_table_fqn (
293+ self , table_details : TableDetails , services : Optional [List [str ]] = None
294+ ) -> str :
295+ search_services = services or self .get_db_service_names ()
296+ for db_service in search_services :
297+ result = fqn .build (
298+ metadata = self .metadata ,
299+ entity_type = Table ,
300+ service_name = db_service ,
301+ database_name = table_details .database ,
302+ schema_name = table_details .schema ,
303+ table_name = table_details .name ,
304+ )
305+ if result :
306+ return result
307+ raise FQNNotFoundException (
308+ f"Table FQN not found for { table_details } in services { search_services } "
309+ )
310+
311+ def _get_table_fqn (
312+ self , table_details : TableDetails , namespace : Optional [str ] = None
313+ ) -> Optional [str ]:
218314 if not self .get_db_service_names ():
219315 if not self ._db_service_names_warned :
220316 logger .warning (
@@ -224,16 +320,39 @@ def _get_table_fqn(self, table_details: TableDetails) -> Optional[str]:
224320 )
225321 self ._db_service_names_warned = True
226322 return None
323+
324+ resolved_services = self ._resolve_db_services_for_namespace (namespace )
325+
227326 try :
228- return self ._get_table_fqn_from_om (table_details )
327+ return self ._find_table_fqn (table_details , services = resolved_services )
229328 except FQNNotFoundException :
230329 try :
231- schema_fqn = self ._get_schema_fqn_from_om (table_details .schema )
232-
330+ schema_fqn = self ._get_schema_fqn_from_om (
331+ table_details .schema , services = resolved_services
332+ )
233333 return f"{ schema_fqn } .{ table_details .name } "
234334 except FQNNotFoundException :
235335 return None
236336
337+ def _get_table_fqn_from_om (
338+ self , table_details : TableDetails , services : Optional [List [str ]] = None
339+ ) -> str :
340+ """
341+ Looks for matching Table entity in OM across all configured DB services.
342+ """
343+ for db_service in services or self .get_db_service_names ():
344+ result = fqn .build (
345+ metadata = self .metadata ,
346+ entity_type = Table ,
347+ service_name = db_service ,
348+ database_name = table_details .database ,
349+ schema_name = table_details .schema ,
350+ table_name = table_details .name ,
351+ )
352+ if result :
353+ return result
354+ raise FQNNotFoundException (f"Table FQN not found for { table_details } " )
355+
237356 def _build_broker_to_service_map (self ) -> Dict [str , str ]:
238357 """
239358 Build a cache mapping broker hostnames to messaging service FQNs.
@@ -309,15 +428,18 @@ def _get_topic_entity(self, topic_details: TopicDetails) -> Optional[Topic]:
309428 logger .warning (f"Error finding topic for { topic_details .name } : { exc } " )
310429 return None
311430
312- def _get_schema_fqn_from_om (self , schema : str ) -> Optional [str ]:
431+ def _get_schema_fqn_from_om (
432+ self , schema : str , services : Optional [List [str ]] = None
433+ ) -> Optional [str ]:
313434 """
314435 Based on partial schema name look for any matching DatabaseSchema object in open metadata.
315436
316437 :param schema: schema name
438+ :param services: optional list of service names to search
317439 :return: fully qualified name of a DatabaseSchema in Open Metadata
318440 """
319441 result = None
320- services = self .get_db_service_names ()
442+ services = services or self .get_db_service_names ()
321443
322444 for db_service in services :
323445 result = fqn .build (
@@ -469,7 +591,10 @@ def _build_ol_name_to_fqn_map(self, tables: List):
469591 entity_details = self ._get_entity_details (table )
470592 if entity_details .entity_type != "table" :
471593 continue
472- table_fqn = self ._get_table_fqn (entity_details .table_details )
594+ table_fqn = self ._get_table_fqn (
595+ entity_details .table_details ,
596+ namespace = table .get ("namespace" ),
597+ )
473598
474599 if table_fqn :
475600 result [OpenlineageSource ._get_ol_table_name (table )] = table_fqn
@@ -505,7 +630,10 @@ def _get_column_lineage(
505630 if entity_details .entity_type != "table" :
506631 continue
507632
508- output_table_fqn = self ._get_table_fqn (entity_details .table_details )
633+ output_table_fqn = self ._get_table_fqn (
634+ entity_details .table_details ,
635+ namespace = table .get ("namespace" ),
636+ )
509637 for field_name , field_spec in (
510638 table .get ("facets" , {})
511639 .get ("columnLineage" , {})
@@ -604,15 +732,16 @@ def yield_pipeline_lineage_details(
604732 if create_table_request :
605733 yield create_table_request
606734
607- table_fqn = self ._get_table_fqn (entity_details .table_details )
735+ table_fqn = self ._get_table_fqn (
736+ entity_details .table_details ,
737+ namespace = entity_data .get ("namespace" ),
738+ )
608739
609740 if table_fqn :
610741 entity_list .append (
611742 LineageNode (
612743 fqn = TableFQN (value = table_fqn ),
613- uuid = self .metadata .get_by_name (
614- Table , table_fqn
615- ).id .root ,
744+ uuid = self ._get_by_name_cached (Table , table_fqn ).id .root ,
616745 node_type = "table" ,
617746 )
618747 )
0 commit comments