Skip to content

Commit d6b5389

Browse files
sonika-shahOpenMetadata Release Bot
authored andcommitted
Add source filter and indexed hash prefix to cert tag batch query (#27847)
* Add source filter and use indexed hash prefix in cert tag batch query The certification tag batch query (TagUsageDAO.getCertTagsInternalBatch) was hitting ~12 seconds per call on instances with deep classification hierarchies — fired ~5,800 times per Data Insights run, contributing ~19 hrs of cumulative DB time per DI run. Two missing index-friendly predicates caused the slowness: 1. No `source = ?` filter — couldn't use idx_tag_usage_target_exact (source, targetFQNHash, state) INCLUDE (tagFQN, labelType) whose covering INCLUDE has tagFQN. 2. `tagFQN LIKE 'Certification.%'` on the raw column — there's no LIKE-friendly index on raw tagFQN, only on tagfqn_lower text_pattern_ops and tagFQNHash. The LIKE always ran as a post-filter on every row the IN clause returned. Fix: - Add `source = :source` filter (Certifications are always Classification source = 0). - Switch `tagFQN LIKE :tagFQNPrefix` → `tagFQNHash LIKE :tagFQNHashPrefix`, with the hash prefix pre-computed via FullyQualifiedName.buildHash so the query hits the indexed hash column. Same SQL on MySQL and Postgres — no @ConnectionAwareSqlQuery split needed. Also a correctness improvement: the `source = 0` filter excludes glossary terms (source = 1) that happen to have FQNs starting with "Certification.". Previously such glossary terms could be incorrectly returned as certifications; now they're excluded as expected. Test: - Added test_certBatch_bulkFetchReturnsCorrectCertsPerEntity in TagResourceIT — exercises the bulk fetch path with three schemas (cert-tagged / untagged / non-cert-tagged) and asserts each gets the right certification (or null) in the listed response. Locks in source-filter correctness and prevents future regressions where a non-cert tag could leak into the certification field. * Fix duplicate schema names in cert batch test, trim verbose comments * Update EntityRepositoryCertificationTest mocks for new getCertTagsInternalBatch signature * fix check style (cherry picked from commit 4a2f42f)
1 parent 12117ce commit d6b5389

4 files changed

Lines changed: 126 additions & 14 deletions

File tree

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TagResourceIT.java

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,104 @@ void test_certificationTagNotLeakingIntoTagsField(TestNamespace ns) {
15781578
"LIST (batch): regular tag must still be present in tags field");
15791579
}
15801580

1581+
@Test
1582+
void test_certBatch_bulkFetchReturnsCorrectCertsPerEntity(TestNamespace ns) {
1583+
OpenMetadataClient client = SdkClients.adminClient();
1584+
1585+
org.openmetadata.schema.entity.classification.Classification certClassification =
1586+
client.classifications().getByName("Certification", null);
1587+
assertNotNull(certClassification, "Certification classification must exist");
1588+
1589+
CreateTag createCertTag = new CreateTag();
1590+
createCertTag.setName(ns.shortPrefix("cert_bulk_tag"));
1591+
createCertTag.setClassification(certClassification.getFullyQualifiedName());
1592+
createCertTag.setDescription("Cert tag for bulk fetch test");
1593+
Tag certTag = SdkClients.adminClient().tags().create(createCertTag);
1594+
1595+
org.openmetadata.schema.entity.classification.Classification regularClassification =
1596+
createClassification(ns);
1597+
CreateTag createRegularTag = new CreateTag();
1598+
createRegularTag.setName(ns.shortPrefix("regular_bulk_tag"));
1599+
createRegularTag.setClassification(regularClassification.getFullyQualifiedName());
1600+
createRegularTag.setDescription("Non-cert tag for bulk fetch test");
1601+
Tag regularTag = SdkClients.adminClient().tags().create(createRegularTag);
1602+
1603+
org.openmetadata.schema.entity.services.DatabaseService dbService =
1604+
createDatabaseService(ns, "cert_bulk_svc");
1605+
org.openmetadata.schema.entity.data.Database db =
1606+
createDatabase(ns, dbService.getFullyQualifiedName());
1607+
1608+
DatabaseSchema schemaWithCert =
1609+
createDatabaseSchemaNamed(ns, db.getFullyQualifiedName(), "cert_bulk_with");
1610+
DatabaseSchema schemaWithoutCert =
1611+
createDatabaseSchemaNamed(ns, db.getFullyQualifiedName(), "cert_bulk_without");
1612+
DatabaseSchema schemaWithRegularTag =
1613+
createDatabaseSchemaNamed(ns, db.getFullyQualifiedName(), "cert_bulk_regular");
1614+
1615+
org.openmetadata.schema.type.TagLabel certTagLabel =
1616+
new org.openmetadata.schema.type.TagLabel()
1617+
.withTagFQN(certTag.getFullyQualifiedName())
1618+
.withSource(org.openmetadata.schema.type.TagLabel.TagSource.CLASSIFICATION)
1619+
.withLabelType(org.openmetadata.schema.type.TagLabel.LabelType.MANUAL);
1620+
schemaWithCert.setCertification(new AssetCertification().withTagLabel(certTagLabel));
1621+
client.databaseSchemas().update(schemaWithCert.getId().toString(), schemaWithCert);
1622+
1623+
org.openmetadata.schema.type.TagLabel regularTagLabel =
1624+
new org.openmetadata.schema.type.TagLabel()
1625+
.withTagFQN(regularTag.getFullyQualifiedName())
1626+
.withSource(org.openmetadata.schema.type.TagLabel.TagSource.CLASSIFICATION)
1627+
.withLabelType(org.openmetadata.schema.type.TagLabel.LabelType.MANUAL);
1628+
schemaWithRegularTag.setTags(List.of(regularTagLabel));
1629+
client.databaseSchemas().update(schemaWithRegularTag.getId().toString(), schemaWithRegularTag);
1630+
1631+
org.openmetadata.sdk.models.ListParams listParams =
1632+
new org.openmetadata.sdk.models.ListParams()
1633+
.setDatabase(db.getFullyQualifiedName())
1634+
.setFields("certification");
1635+
org.openmetadata.sdk.models.ListResponse<DatabaseSchema> listed =
1636+
client.databaseSchemas().list(listParams);
1637+
assertNotNull(listed.getData());
1638+
1639+
DatabaseSchema listedWithCert =
1640+
listed.getData().stream()
1641+
.filter(s -> s.getId().equals(schemaWithCert.getId()))
1642+
.findFirst()
1643+
.orElse(null);
1644+
DatabaseSchema listedWithoutCert =
1645+
listed.getData().stream()
1646+
.filter(s -> s.getId().equals(schemaWithoutCert.getId()))
1647+
.findFirst()
1648+
.orElse(null);
1649+
DatabaseSchema listedWithRegularTag =
1650+
listed.getData().stream()
1651+
.filter(s -> s.getId().equals(schemaWithRegularTag.getId()))
1652+
.findFirst()
1653+
.orElse(null);
1654+
1655+
assertNotNull(listedWithCert);
1656+
assertNotNull(listedWithoutCert);
1657+
assertNotNull(listedWithRegularTag);
1658+
1659+
assertNotNull(listedWithCert.getCertification(), "cert-tagged schema: certification missing");
1660+
assertEquals(
1661+
certTag.getFullyQualifiedName(),
1662+
listedWithCert.getCertification().getTagLabel().getTagFQN());
1663+
1664+
assertNull(listedWithoutCert.getCertification(), "untagged schema: false-positive cert");
1665+
assertNull(
1666+
listedWithRegularTag.getCertification(),
1667+
"non-cert tag from another classification leaked as certification");
1668+
}
1669+
1670+
private org.openmetadata.schema.entity.data.DatabaseSchema createDatabaseSchemaNamed(
1671+
TestNamespace ns, String databaseFqn, String name) {
1672+
org.openmetadata.schema.api.data.CreateDatabaseSchema createSchema =
1673+
new org.openmetadata.schema.api.data.CreateDatabaseSchema();
1674+
createSchema.setName(ns.shortPrefix(name));
1675+
createSchema.setDatabase(databaseFqn);
1676+
return SdkClients.adminClient().databaseSchemas().create(createSchema);
1677+
}
1678+
15811679
@Test
15821680
void test_certificationTagRenamePropagatesToEntityAndSearch(TestNamespace ns) throws Exception {
15831681
OpenMetadataClient client = SdkClients.adminClient();

openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5050,13 +5050,15 @@ List<TagLabelWithFQNHash> getTagsInternalBatch(
50505050
@SqlQuery(
50515051
"SELECT targetFQNHash, source, tagFQN, labelType, state, reason, appliedAt, appliedBy, metadata "
50525052
+ "FROM tag_usage "
5053-
+ "WHERE targetFQNHash IN (<targetFQNHashes>) "
5054-
+ "AND tagFQN LIKE :tagFQNPrefix "
5053+
+ "WHERE source = :source "
5054+
+ "AND targetFQNHash IN (<targetFQNHashes>) "
5055+
+ "AND tagFQNHash LIKE :tagFQNHashPrefix "
50555056
+ "ORDER BY targetFQNHash, tagFQN")
50565057
@UseRowMapper(TagLabelWithFQNHashMapper.class)
50575058
List<TagLabelWithFQNHash> getCertTagsInternalBatch(
5059+
@Bind("source") int source,
50585060
@BindListFQN("targetFQNHashes") List<String> targetFQNHashes,
5059-
@Bind("tagFQNPrefix") String tagFQNPrefix);
5061+
@Bind("tagFQNHashPrefix") String tagFQNHashPrefix);
50605062

50615063
/**
50625064
* Batch fetch derived tags for multiple glossary term FQNs. Returns a map from glossary term

openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4959,7 +4959,9 @@ protected AssetCertification getCertification(T entity) {
49594959
daoCollection
49604960
.tagUsageDAO()
49614961
.getCertTagsInternalBatch(
4962-
List.of(entity.getFullyQualifiedName()), certClassification + ".%");
4962+
TagLabel.TagSource.CLASSIFICATION.ordinal(),
4963+
List.of(entity.getFullyQualifiedName()),
4964+
FullyQualifiedName.buildHash(certClassification) + ".%");
49634965
if (nullOrEmpty(certTags)) return null;
49644966
return buildCertificationFromCertTag(certTags.get(0).toTagLabel());
49654967
}
@@ -9645,7 +9647,12 @@ private Map<UUID, AssetCertification> batchFetchCertification(List<T> entities)
96459647
List<CollectionDAO.TagUsageDAO.TagLabelWithFQNHash> certTags;
96469648
try {
96479649
certTags =
9648-
daoCollection.tagUsageDAO().getCertTagsInternalBatch(fqnList, certClassification + ".%");
9650+
daoCollection
9651+
.tagUsageDAO()
9652+
.getCertTagsInternalBatch(
9653+
TagLabel.TagSource.CLASSIFICATION.ordinal(),
9654+
fqnList,
9655+
FullyQualifiedName.buildHash(certClassification) + ".%");
96499656
} catch (Exception e) {
96509657
LOG.warn(
96519658
"batchFetchCertification: batch query failed, falling back to individual fetch: {}",

openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryCertificationTest.java

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ void getCertificationReturnsCertWhenTagFound() {
113113
tagEntry.setLabelType(TagLabel.LabelType.AUTOMATED.ordinal());
114114
tagEntry.setState(TagLabel.State.CONFIRMED.ordinal());
115115

116-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString()))
116+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
117117
.thenReturn(List.of(tagEntry));
118118

119119
AssetCertification cert = repo.getCertification(entity);
@@ -130,7 +130,8 @@ void getCertificationReturnsNullWhenNoTagFound() {
130130
.withName("my-pipeline")
131131
.withFullyQualifiedName("service.my-pipeline");
132132

133-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString())).thenReturn(List.of());
133+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
134+
.thenReturn(List.of());
134135

135136
AssetCertification cert = repo.getCertification(entity);
136137

@@ -206,7 +207,7 @@ void applyCertificationSkipsWhenSameCertAlreadyExists() {
206207
existingEntry.setLabelType(TagLabel.LabelType.AUTOMATED.ordinal());
207208
existingEntry.setState(TagLabel.State.CONFIRMED.ordinal());
208209

209-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString()))
210+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
210211
.thenReturn(List.of(existingEntry));
211212

212213
assertDoesNotThrow(() -> repo.applyCertification(entity));
@@ -227,7 +228,8 @@ void applyCertificationAppliesTagWhenCertIsDifferent() {
227228
.withFullyQualifiedName("service.my-pipeline")
228229
.withCertification(incoming);
229230

230-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString())).thenReturn(List.of());
231+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
232+
.thenReturn(List.of());
231233

232234
assertDoesNotThrow(() -> repo.applyCertification(entity));
233235

@@ -282,7 +284,8 @@ void storeRelationshipsInternalWithNoCertificationEntityDoesNotThrow() {
282284
.withFullyQualifiedName("service.my-pipeline")
283285
.withCertification(null);
284286

285-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString())).thenReturn(List.of());
287+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
288+
.thenReturn(List.of());
286289

287290
assertDoesNotThrow(() -> repo.storeRelationshipsInternal(List.of(entity)));
288291
}
@@ -393,7 +396,8 @@ void storeRelationshipsInternalSingleEntityWithNoCert() {
393396
.withFullyQualifiedName("service.my-pipeline")
394397
.withCertification(null);
395398

396-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString())).thenReturn(List.of());
399+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
400+
.thenReturn(List.of());
397401

398402
assertDoesNotThrow(() -> repo.storeRelationshipsInternal(entity));
399403
}
@@ -415,7 +419,7 @@ void fetchAndSetFieldsPopulatesCertification() {
415419
tagEntry.setState(TagLabel.State.CONFIRMED.ordinal());
416420
tagEntry.setTargetFQNHash(FullyQualifiedName.buildHash("service.my-pipeline"));
417421

418-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString()))
422+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
419423
.thenReturn(List.of(tagEntry));
420424

421425
Fields certFields = new Fields(Set.of("certification"));
@@ -433,7 +437,7 @@ void fetchAndSetFieldsFallsBackToIndividualFetchOnBatchException() {
433437
.withName("my-pipeline")
434438
.withFullyQualifiedName("service.my-pipeline");
435439

436-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString()))
440+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
437441
.thenThrow(new RuntimeException("DB error"))
438442
.thenReturn(List.of());
439443

@@ -568,7 +572,8 @@ void applyCertificationUsesEntityUpdatedByAsAppliedBy() {
568572
.withUpdatedBy("alice")
569573
.withCertification(new AssetCertification().withTagLabel(tagLabel));
570574

571-
when(tagUsageDAO.getCertTagsInternalBatch(anyList(), anyString())).thenReturn(List.of());
575+
when(tagUsageDAO.getCertTagsInternalBatch(anyInt(), anyList(), anyString()))
576+
.thenReturn(List.of());
572577

573578
assertDoesNotThrow(() -> repo.applyCertification(entity));
574579

0 commit comments

Comments
 (0)