Skip to content

Commit e832508

Browse files
committed
Add source filter and use indexed hash prefix in cert tag batch query
The certification tag batch query (TagUsageDAO.getCertTagsInternalBatch) was hitting ~12 seconds per call on instances with deep classification hierarchies — fired ~5,800 times per Data Insights run, contributing ~19 hrs of cumulative DB time per DI run. Two missing index-friendly predicates caused the slowness: 1. No `source = ?` filter — couldn't use idx_tag_usage_target_exact (source, targetFQNHash, state) INCLUDE (tagFQN, labelType) whose covering INCLUDE has tagFQN. 2. `tagFQN LIKE 'Certification.%'` on the raw column — there's no LIKE-friendly index on raw tagFQN, only on tagfqn_lower text_pattern_ops and tagFQNHash. The LIKE always ran as a post-filter on every row the IN clause returned. Fix: - Add `source = :source` filter (Certifications are always Classification source = 0). - Switch `tagFQN LIKE :tagFQNPrefix` → `tagFQNHash LIKE :tagFQNHashPrefix`, with the hash prefix pre-computed via FullyQualifiedName.buildHash so the query hits the indexed hash column. Same SQL on MySQL and Postgres — no @ConnectionAwareSqlQuery split needed. Also a correctness improvement: the `source = 0` filter excludes glossary terms (source = 1) that happen to have FQNs starting with "Certification.". Previously such glossary terms could be incorrectly returned as certifications; now they're excluded as expected. Test: - Added test_certBatch_bulkFetchReturnsCorrectCertsPerEntity in TagResourceIT — exercises the bulk fetch path with three schemas (cert-tagged / untagged / non-cert-tagged) and asserts each gets the right certification (or null) in the listed response. Locks in source-filter correctness and prevents future regressions where a non-cert tag could leak into the certification field.
1 parent 048a15e commit e832508

3 files changed

Lines changed: 122 additions & 5 deletions

File tree

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TagResourceIT.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,6 +1589,114 @@ void test_certificationTagNotLeakingIntoTagsField(TestNamespace ns) {
15891589
"LIST (batch): regular tag must still be present in tags field");
15901590
}
15911591

1592+
/**
1593+
* Verifies the batched cert fetch path — the underlying query uses
1594+
* {@code WHERE source = 0 AND targetFQNHash IN (...) AND tagFQNHash LIKE 'md5(Certification).%'}.
1595+
*
1596+
* <p>Covers three correctness properties of that query:
1597+
* <ul>
1598+
* <li>Entities with a cert get the correct certification populated</li>
1599+
* <li>Entities without any cert get a null certification (no false positives from the IN list)</li>
1600+
* <li>Entities with a non-cert tag from a different classification do NOT get that tag treated
1601+
* as a certification (regression test for the {@code source} filter + hash prefix).</li>
1602+
* </ul>
1603+
*/
1604+
@Test
1605+
void test_certBatch_bulkFetchReturnsCorrectCertsPerEntity(TestNamespace ns) {
1606+
OpenMetadataClient client = SdkClients.adminClient();
1607+
1608+
org.openmetadata.schema.entity.classification.Classification certClassification =
1609+
client.classifications().getByName("Certification", null);
1610+
assertNotNull(certClassification, "Certification classification must exist");
1611+
1612+
String certTagName = ns.shortPrefix("cert_bulk_tag");
1613+
CreateTag createCertTag = new CreateTag();
1614+
createCertTag.setName(certTagName);
1615+
createCertTag.setClassification(certClassification.getFullyQualifiedName());
1616+
createCertTag.setDescription("Cert tag for bulk fetch test");
1617+
Tag certTag = SdkClients.adminClient().tags().create(createCertTag);
1618+
1619+
org.openmetadata.schema.entity.classification.Classification regularClassification =
1620+
createClassification(ns);
1621+
CreateTag createRegularTag = new CreateTag();
1622+
createRegularTag.setName(ns.shortPrefix("regular_bulk_tag"));
1623+
createRegularTag.setClassification(regularClassification.getFullyQualifiedName());
1624+
createRegularTag.setDescription("Non-cert tag for bulk fetch test");
1625+
Tag regularTag = SdkClients.adminClient().tags().create(createRegularTag);
1626+
1627+
org.openmetadata.schema.entity.services.DatabaseService dbService =
1628+
createDatabaseService(ns, "cert_bulk_svc");
1629+
org.openmetadata.schema.entity.data.Database db =
1630+
createDatabase(ns, dbService.getFullyQualifiedName());
1631+
1632+
DatabaseSchema schemaWithCert = createDatabaseSchema(ns, db.getFullyQualifiedName());
1633+
DatabaseSchema schemaWithoutCert = createDatabaseSchema(ns, db.getFullyQualifiedName());
1634+
DatabaseSchema schemaWithRegularTag = createDatabaseSchema(ns, db.getFullyQualifiedName());
1635+
1636+
org.openmetadata.schema.type.TagLabel certTagLabel =
1637+
new org.openmetadata.schema.type.TagLabel()
1638+
.withTagFQN(certTag.getFullyQualifiedName())
1639+
.withSource(org.openmetadata.schema.type.TagLabel.TagSource.CLASSIFICATION)
1640+
.withLabelType(org.openmetadata.schema.type.TagLabel.LabelType.MANUAL);
1641+
schemaWithCert.setCertification(new AssetCertification().withTagLabel(certTagLabel));
1642+
client.databaseSchemas().update(schemaWithCert.getId().toString(), schemaWithCert);
1643+
1644+
org.openmetadata.schema.type.TagLabel regularTagLabel =
1645+
new org.openmetadata.schema.type.TagLabel()
1646+
.withTagFQN(regularTag.getFullyQualifiedName())
1647+
.withSource(org.openmetadata.schema.type.TagLabel.TagSource.CLASSIFICATION)
1648+
.withLabelType(org.openmetadata.schema.type.TagLabel.LabelType.MANUAL);
1649+
schemaWithRegularTag.setTags(List.of(regularTagLabel));
1650+
client
1651+
.databaseSchemas()
1652+
.update(schemaWithRegularTag.getId().toString(), schemaWithRegularTag);
1653+
1654+
org.openmetadata.sdk.models.ListParams listParams =
1655+
new org.openmetadata.sdk.models.ListParams()
1656+
.setDatabase(db.getFullyQualifiedName())
1657+
.setFields("certification");
1658+
org.openmetadata.sdk.models.ListResponse<DatabaseSchema> listed =
1659+
client.databaseSchemas().list(listParams);
1660+
assertNotNull(listed.getData(), "List response must contain data");
1661+
1662+
DatabaseSchema listedWithCert =
1663+
listed.getData().stream()
1664+
.filter(s -> s.getId().equals(schemaWithCert.getId()))
1665+
.findFirst()
1666+
.orElse(null);
1667+
DatabaseSchema listedWithoutCert =
1668+
listed.getData().stream()
1669+
.filter(s -> s.getId().equals(schemaWithoutCert.getId()))
1670+
.findFirst()
1671+
.orElse(null);
1672+
DatabaseSchema listedWithRegularTag =
1673+
listed.getData().stream()
1674+
.filter(s -> s.getId().equals(schemaWithRegularTag.getId()))
1675+
.findFirst()
1676+
.orElse(null);
1677+
1678+
assertNotNull(listedWithCert, "Cert-tagged schema must appear in list");
1679+
assertNotNull(listedWithoutCert, "Untagged schema must appear in list");
1680+
assertNotNull(listedWithRegularTag, "Regular-tagged schema must appear in list");
1681+
1682+
assertNotNull(
1683+
listedWithCert.getCertification(),
1684+
"Bulk fetch must populate certification for cert-tagged schema");
1685+
assertEquals(
1686+
certTag.getFullyQualifiedName(),
1687+
listedWithCert.getCertification().getTagLabel().getTagFQN(),
1688+
"Bulk fetch must return the exact cert tag applied");
1689+
1690+
assertNull(
1691+
listedWithoutCert.getCertification(),
1692+
"Schema without any cert tag must have null certification (no false positives)");
1693+
1694+
assertNull(
1695+
listedWithRegularTag.getCertification(),
1696+
"Schema with a non-cert tag from a different classification must have null certification "
1697+
+ "(source filter + hash prefix must exclude tags outside the Certification classification)");
1698+
}
1699+
15921700
@Test
15931701
void test_certificationTagRenamePropagatesToEntityAndSearch(TestNamespace ns) throws Exception {
15941702
OpenMetadataClient client = SdkClients.adminClient();

openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6476,13 +6476,15 @@ List<TagLabelWithFQNHash> getTagsInternalBatch(
64766476
@SqlQuery(
64776477
"SELECT targetFQNHash, source, tagFQN, labelType, state, reason, appliedAt, appliedBy, metadata "
64786478
+ "FROM tag_usage "
6479-
+ "WHERE targetFQNHash IN (<targetFQNHashes>) "
6480-
+ "AND tagFQN LIKE :tagFQNPrefix "
6479+
+ "WHERE source = :source "
6480+
+ "AND targetFQNHash IN (<targetFQNHashes>) "
6481+
+ "AND tagFQNHash LIKE :tagFQNHashPrefix "
64816482
+ "ORDER BY targetFQNHash, tagFQN")
64826483
@UseRowMapper(TagLabelWithFQNHashMapper.class)
64836484
List<TagLabelWithFQNHash> getCertTagsInternalBatch(
6485+
@Bind("source") int source,
64846486
@BindListFQN("targetFQNHashes") List<String> targetFQNHashes,
6485-
@Bind("tagFQNPrefix") String tagFQNPrefix);
6487+
@Bind("tagFQNHashPrefix") String tagFQNHashPrefix);
64866488

64876489
/**
64886490
* Batch fetch derived tags for multiple glossary term FQNs. Returns a map from glossary term

openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5266,7 +5266,9 @@ protected AssetCertification getCertification(T entity) {
52665266
daoCollection
52675267
.tagUsageDAO()
52685268
.getCertTagsInternalBatch(
5269-
List.of(entity.getFullyQualifiedName()), certClassification + ".%");
5269+
TagLabel.TagSource.CLASSIFICATION.ordinal(),
5270+
List.of(entity.getFullyQualifiedName()),
5271+
FullyQualifiedName.buildHash(certClassification) + ".%");
52705272
if (nullOrEmpty(certTags)) return null;
52715273
return buildCertificationFromCertTag(certTags.get(0).toTagLabel());
52725274
}
@@ -9944,7 +9946,12 @@ private Map<UUID, AssetCertification> batchFetchCertification(List<T> entities)
99449946
List<CollectionDAO.TagUsageDAO.TagLabelWithFQNHash> certTags;
99459947
try {
99469948
certTags =
9947-
daoCollection.tagUsageDAO().getCertTagsInternalBatch(fqnList, certClassification + ".%");
9949+
daoCollection
9950+
.tagUsageDAO()
9951+
.getCertTagsInternalBatch(
9952+
TagLabel.TagSource.CLASSIFICATION.ordinal(),
9953+
fqnList,
9954+
FullyQualifiedName.buildHash(certClassification) + ".%");
99489955
} catch (Exception e) {
99499956
LOG.warn(
99509957
"batchFetchCertification: batch query failed, falling back to individual fetch: {}",

0 commit comments

Comments
 (0)