diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py index 17cbc1c4885e..a40b81d8593e 100644 --- a/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/dbt/metadata.py @@ -114,6 +114,30 @@ logger = ingestion_logger() +def _get_dbt_test_description(manifest_node) -> Optional[str]: # noqa: UP045 + """Get test description, falling back to config.meta.description. + + dbt tests can carry descriptions in two places inside manifest.json: + 1. ``node.description`` — set via the ``description:`` key in schema.yml + 2. ``node.config.meta.description`` — set via ``config(meta={'description': ...})`` + inside the test SQL file. + + OpenMetadata historically only reads path (1). This helper also checks + path (2) so that tests documented through the ``config()`` block are + picked up as well. + """ + if manifest_node.description: + return manifest_node.description + if ( + hasattr(manifest_node, "config") + and manifest_node.config + and hasattr(manifest_node.config, "meta") + and isinstance(manifest_node.config.meta, dict) + ): + return manifest_node.config.meta.get("description") or None + return None + + class InvalidServiceException(Exception): # noqa: N818 """ The service passed in config is not found @@ -1405,6 +1429,7 @@ def create_dbt_tests_definition(self, dbt_test: dict) -> Iterable[Either[CreateT fqn=manifest_node.name, entity=TestDefinition, ) + description = _get_dbt_test_description(manifest_node) if not check_test_definition_exists: entity_type = EntityType.TABLE if get_manifest_column_name(manifest_node): @@ -1412,7 +1437,7 @@ def create_dbt_tests_definition(self, dbt_test: dict) -> Iterable[Either[CreateT yield Either( right=CreateTestDefinitionRequest( name=manifest_node.name, - description=manifest_node.description, + description=description, entityType=entity_type, testPlatforms=[TestPlatform.dbt], parameterDefinition=create_test_case_parameter_definitions(manifest_node), @@ -1420,6 +1445,13 @@ def create_dbt_tests_definition(self, dbt_test: dict) -> Iterable[Either[CreateT owners=None, ) ) + elif description and self.source_config.dbtUpdateDescriptions: + self.metadata.patch_description( + entity=TestDefinition, + source=check_test_definition_exists, + description=description, + force=True, + ) except Exception as err: # pylint: disable=broad-except yield Either( left=StackTraceError( @@ -1454,12 +1486,13 @@ def create_dbt_test_case(self, dbt_test: dict) -> Iterable[Either[CreateTestCase ) test_case = self.metadata.get_by_name(TestCase, test_case_fqn, fields=["testDefinition,testSuite"]) + description = _get_dbt_test_description(manifest_node) if test_case is None: # Create the test case only if it does not exist yield Either( right=CreateTestCaseRequest( name=manifest_node.name, - description=manifest_node.description, + description=description, testDefinition=FullyQualifiedEntityName(manifest_node.name), entityLink=entity_link_str, parameterValues=create_test_case_parameter_values(dbt_test), @@ -1467,6 +1500,13 @@ def create_dbt_test_case(self, dbt_test: dict) -> Iterable[Either[CreateTestCase owners=None, ) ) + elif description and self.source_config.dbtUpdateDescriptions: + self.metadata.patch_description( + entity=TestCase, + source=test_case, + description=description, + force=True, + ) logger.debug(f"Test case Already Exists: {test_case_fqn}") except Exception as err: # pylint: disable=broad-except yield Either( diff --git a/ingestion/tests/unit/test_dbt.py b/ingestion/tests/unit/test_dbt.py index 9a7ad7bc3eea..09210f4a0a18 100644 --- a/ingestion/tests/unit/test_dbt.py +++ b/ingestion/tests/unit/test_dbt.py @@ -3640,3 +3640,436 @@ def test_multiple_non_required_keys_of_mixed_types(self): assert manifest_dict["parent_map"] == {} assert manifest_dict["child_map"] == {} assert manifest_dict["group_map"] == [] + + +class TestGetDbtTestDescription(TestCase): + """ + Tests for the _get_dbt_test_description helper that resolves a test + description from either manifest_node.description (schema.yml) or the + fallback path manifest_node.config.meta["description"] (config() block). + """ + + def test_returns_node_description_when_present(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock() + node.description = "from schema.yml" + assert _get_dbt_test_description(node) == "from schema.yml" + + def test_falls_back_to_config_meta_description(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock() + node.description = "" + node.config.meta = {"description": "from config block"} + assert _get_dbt_test_description(node) == "from config block" + + def test_returns_none_when_both_empty(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock() + node.description = "" + node.config.meta = {} + assert _get_dbt_test_description(node) is None + + def test_returns_none_when_no_config(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock(spec=[]) # no attributes at all + node.description = "" + assert _get_dbt_test_description(node) is None + + def test_returns_none_when_config_meta_is_none(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock() + node.description = "" + node.config.meta = None + assert _get_dbt_test_description(node) is None + + def test_node_description_takes_priority_over_config_meta(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock() + node.description = "primary" + node.config.meta = {"description": "fallback"} + assert _get_dbt_test_description(node) == "primary" + + def test_config_meta_not_a_dict_returns_none(self): + from metadata.ingestion.source.database.dbt.metadata import _get_dbt_test_description + + node = MagicMock() + node.description = "" + node.config.meta = "not a dict" + assert _get_dbt_test_description(node) is None + + +class TestDbtDescriptionUpdateOnRerun(TestCase): + """ + Tests that create_dbt_tests_definition() and create_dbt_test_case() + patch descriptions on existing entities when dbtUpdateDescriptions is + enabled and the manifest node carries a description — including the + config.meta.description fallback path. + """ + + @staticmethod + def _make_dbt_source(dbt_update_descriptions=True): + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = MagicMock(spec=DbtSource) + source.metadata = MagicMock() + source.source_config = MagicMock() + source.source_config.dbtUpdateDescriptions = dbt_update_descriptions + source.context = MagicMock() + return source + + @staticmethod + def _make_manifest_node(name="test_not_null_orders_id", description="Check for nulls", config_meta_desc=None): + """Build a mock manifest node. + + Args: + description: value for node.description (schema.yml path) + config_meta_desc: if set, populates node.config.meta["description"] + (config() block path) + """ + node = MagicMock() + node.name = name + node.description = description + if config_meta_desc is not None: + node.config.meta = {"description": config_meta_desc} + else: + # Default: config.meta is an empty dict (no fallback) + node.config.meta = {} + return node + + # ── create_dbt_tests_definition ────────────────────────────── + + def test_definition_description_patched_when_exists_and_update_enabled(self): + """ + When a TestDefinition already exists, has a manifest description, + and dbtUpdateDescriptions is True, patch_description must be called. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + existing_definition = MagicMock() # simulates existing entity + source.metadata.get_by_name.return_value = existing_definition + source.create_dbt_tests_definition = DbtSource.create_dbt_tests_definition.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node()} + + # Consume the generator + list(source.create_dbt_tests_definition(dbt_test)) + + source.metadata.patch_description.assert_called_once() + call_kwargs = source.metadata.patch_description.call_args + from metadata.generated.schema.tests.testDefinition import TestDefinition + + assert call_kwargs.kwargs["entity"] is TestDefinition + assert call_kwargs.kwargs["source"] is existing_definition + assert call_kwargs.kwargs["description"] == "Check for nulls" + assert call_kwargs.kwargs["force"] is True + + def test_definition_description_not_patched_when_update_disabled(self): + """ + When dbtUpdateDescriptions is False, patch_description must NOT be + called even if the entity exists and has a description. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=False) + source.metadata.get_by_name.return_value = MagicMock() # exists + source.create_dbt_tests_definition = DbtSource.create_dbt_tests_definition.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node()} + list(source.create_dbt_tests_definition(dbt_test)) + + source.metadata.patch_description.assert_not_called() + + def test_definition_description_not_patched_when_no_description(self): + """ + When the manifest node has no description, patch_description must NOT + be called even if dbtUpdateDescriptions is True. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + source.metadata.get_by_name.return_value = MagicMock() # exists + source.create_dbt_tests_definition = DbtSource.create_dbt_tests_definition.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node(description="")} + list(source.create_dbt_tests_definition(dbt_test)) + + source.metadata.patch_description.assert_not_called() + + def test_definition_created_when_not_exists(self): + """ + When the TestDefinition does not exist yet, a CreateTestDefinitionRequest + must be yielded and patch_description must NOT be called. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + source.metadata.get_by_name.return_value = None # does not exist + source.create_dbt_tests_definition = DbtSource.create_dbt_tests_definition.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node()} + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + patch( + "metadata.ingestion.source.database.dbt.metadata.create_test_case_parameter_definitions", + return_value=[], + ), + ): + results = list(source.create_dbt_tests_definition(dbt_test)) + + assert len(results) == 1 + assert results[0].right is not None + source.metadata.patch_description.assert_not_called() + + # ── create_dbt_test_case ───────────────────────────────────── + + def test_case_description_patched_when_exists_and_update_enabled(self): + """ + When a TestCase already exists, has a manifest description, + and dbtUpdateDescriptions is True, patch_description must be called. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + existing_test_case = MagicMock() + source.metadata.get_by_name.return_value = existing_test_case + source.create_dbt_test_case = DbtSource.create_dbt_test_case.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node()} + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.generate_entity_link") as mock_gen, + patch("metadata.ingestion.source.database.dbt.metadata.get_table_fqn", return_value="svc.db.schema.orders"), + patch("metadata.ingestion.source.database.dbt.metadata.fqn") as mock_fqn, + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + ): + mock_gen.return_value = ["<#E::table::svc.db.schema.orders>"] + mock_fqn.split.return_value = ["svc", "db", "schema", "orders"] + mock_fqn.build.return_value = "svc.db.schema.orders.test_not_null_orders_id" + list(source.create_dbt_test_case(dbt_test)) + + source.metadata.patch_description.assert_called_once() + call_kwargs = source.metadata.patch_description.call_args + from metadata.generated.schema.tests.testCase import TestCase as TestCaseEntity + + assert call_kwargs.kwargs["entity"] is TestCaseEntity + assert call_kwargs.kwargs["source"] is existing_test_case + assert call_kwargs.kwargs["description"] == "Check for nulls" + assert call_kwargs.kwargs["force"] is True + + def test_case_description_not_patched_when_update_disabled(self): + """ + When dbtUpdateDescriptions is False, patch_description must NOT be + called for test case even if it exists with a description. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=False) + source.metadata.get_by_name.return_value = MagicMock() + source.create_dbt_test_case = DbtSource.create_dbt_test_case.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node()} + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.generate_entity_link") as mock_gen, + patch("metadata.ingestion.source.database.dbt.metadata.get_table_fqn", return_value="svc.db.schema.orders"), + patch("metadata.ingestion.source.database.dbt.metadata.fqn") as mock_fqn, + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + ): + mock_gen.return_value = ["<#E::table::svc.db.schema.orders>"] + mock_fqn.split.return_value = ["svc", "db", "schema", "orders"] + mock_fqn.build.return_value = "svc.db.schema.orders.test_not_null_orders_id" + list(source.create_dbt_test_case(dbt_test)) + + source.metadata.patch_description.assert_not_called() + + def test_case_description_not_patched_when_no_description(self): + """ + When the manifest node has no description, patch_description must NOT + be called for test case even if dbtUpdateDescriptions is True. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + source.metadata.get_by_name.return_value = MagicMock() + source.create_dbt_test_case = DbtSource.create_dbt_test_case.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node(description="")} + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.generate_entity_link") as mock_gen, + patch("metadata.ingestion.source.database.dbt.metadata.get_table_fqn", return_value="svc.db.schema.orders"), + patch("metadata.ingestion.source.database.dbt.metadata.fqn") as mock_fqn, + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + ): + mock_gen.return_value = ["<#E::table::svc.db.schema.orders>"] + mock_fqn.split.return_value = ["svc", "db", "schema", "orders"] + mock_fqn.build.return_value = "svc.db.schema.orders.test_not_null_orders_id" + list(source.create_dbt_test_case(dbt_test)) + + source.metadata.patch_description.assert_not_called() + + def test_case_created_when_not_exists(self): + """ + When the TestCase does not exist yet, a CreateTestCaseRequest must be + yielded and patch_description must NOT be called. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + source.metadata.get_by_name.return_value = None # does not exist + source.create_dbt_test_case = DbtSource.create_dbt_test_case.__get__(source, DbtSource) + + dbt_test = {DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node()} + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.generate_entity_link") as mock_gen, + patch("metadata.ingestion.source.database.dbt.metadata.get_table_fqn", return_value="svc.db.schema.orders"), + patch("metadata.ingestion.source.database.dbt.metadata.fqn") as mock_fqn, + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + patch("metadata.ingestion.source.database.dbt.metadata.create_test_case_parameter_values", return_value=[]), + ): + mock_gen.return_value = ["<#E::table::svc.db.schema.orders>"] + mock_fqn.split.return_value = ["svc", "db", "schema", "orders"] + mock_fqn.build.return_value = "svc.db.schema.orders.test_not_null_orders_id" + results = list(source.create_dbt_test_case(dbt_test)) + + assert len(results) == 1 + assert results[0].right is not None + source.metadata.patch_description.assert_not_called() + + # ── config.meta.description fallback ───────────────────────── + + def test_definition_uses_config_meta_description_on_create(self): + """ + When node.description is empty but config.meta has a description, + the fallback value is used for the CreateTestDefinitionRequest. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + source.metadata.get_by_name.return_value = None # does not exist + source.create_dbt_tests_definition = DbtSource.create_dbt_tests_definition.__get__(source, DbtSource) + + dbt_test = { + DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node( + description="", config_meta_desc="from config block" + ) + } + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + patch( + "metadata.ingestion.source.database.dbt.metadata.create_test_case_parameter_definitions", + return_value=[], + ), + ): + results = list(source.create_dbt_tests_definition(dbt_test)) + + assert len(results) == 1 + assert results[0].right.description == "from config block" + + def test_definition_patches_config_meta_description_on_rerun(self): + """ + When entity exists and node.description is empty but config.meta has + a description, patch_description is called with the fallback value. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + existing = MagicMock() + source.metadata.get_by_name.return_value = existing + source.create_dbt_tests_definition = DbtSource.create_dbt_tests_definition.__get__(source, DbtSource) + + dbt_test = { + DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node( + description="", config_meta_desc="from config block" + ) + } + list(source.create_dbt_tests_definition(dbt_test)) + + source.metadata.patch_description.assert_called_once() + assert source.metadata.patch_description.call_args.kwargs["description"] == "from config block" + + def test_case_uses_config_meta_description_on_create(self): + """ + When node.description is empty but config.meta has a description, + the fallback value is used for the CreateTestCaseRequest. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + source.metadata.get_by_name.return_value = None + source.create_dbt_test_case = DbtSource.create_dbt_test_case.__get__(source, DbtSource) + + dbt_test = { + DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node( + description="", config_meta_desc="from config block" + ) + } + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.generate_entity_link") as mock_gen, + patch("metadata.ingestion.source.database.dbt.metadata.get_table_fqn", return_value="svc.db.schema.orders"), + patch("metadata.ingestion.source.database.dbt.metadata.fqn") as mock_fqn, + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + patch("metadata.ingestion.source.database.dbt.metadata.create_test_case_parameter_values", return_value=[]), + ): + mock_gen.return_value = ["<#E::table::svc.db.schema.orders>"] + mock_fqn.split.return_value = ["svc", "db", "schema", "orders"] + mock_fqn.build.return_value = "svc.db.schema.orders.test_not_null_orders_id" + results = list(source.create_dbt_test_case(dbt_test)) + + assert len(results) == 1 + assert results[0].right.description == "from config block" + + def test_case_patches_config_meta_description_on_rerun(self): + """ + When entity exists and node.description is empty but config.meta has + a description, patch_description is called with the fallback value. + """ + from metadata.ingestion.source.database.dbt.constants import DbtCommonEnum + from metadata.ingestion.source.database.dbt.metadata import DbtSource + + source = self._make_dbt_source(dbt_update_descriptions=True) + existing = MagicMock() + source.metadata.get_by_name.return_value = existing + source.create_dbt_test_case = DbtSource.create_dbt_test_case.__get__(source, DbtSource) + + dbt_test = { + DbtCommonEnum.MANIFEST_NODE.value: self._make_manifest_node( + description="", config_meta_desc="from config block" + ) + } + + with ( + patch("metadata.ingestion.source.database.dbt.metadata.generate_entity_link") as mock_gen, + patch("metadata.ingestion.source.database.dbt.metadata.get_table_fqn", return_value="svc.db.schema.orders"), + patch("metadata.ingestion.source.database.dbt.metadata.fqn") as mock_fqn, + patch("metadata.ingestion.source.database.dbt.metadata.get_manifest_column_name", return_value=None), + ): + mock_gen.return_value = ["<#E::table::svc.db.schema.orders>"] + mock_fqn.split.return_value = ["svc", "db", "schema", "orders"] + mock_fqn.build.return_value = "svc.db.schema.orders.test_not_null_orders_id" + list(source.create_dbt_test_case(dbt_test)) + + source.metadata.patch_description.assert_called_once() + assert source.metadata.patch_description.call_args.kwargs["description"] == "from config block"