Skip to content

Commit ce7c6ed

Browse files
authored
fix: tokenize _ in ngram (#24935)
1 parent ea6f23e commit ce7c6ed

2 files changed

Lines changed: 118 additions & 7 deletions

File tree

openmetadata-service/src/test/java/org/openmetadata/service/resources/dqtests/TestCaseResourceTest.java

Lines changed: 114 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,11 +1097,12 @@ void test_getSimpleListFromSearch(TestInfo testInfo) throws IOException, ParseEx
10971097
ts -> ts.getFullyQualifiedName().equals(testCaseForEL.getFullyQualifiedName())));
10981098

10991099
queryParams.clear();
1100-
queryParams.put("q", "test_getSimpleListFromSearchb");
1100+
queryParams.put("q", "test_getSimpleListFromSearch");
11011101
allEntities = listEntitiesFromSearch(queryParams, testCasesNum, 0, ADMIN_AUTH_HEADERS);
1102-
// Note: Since the "name" field and its ngram variant are prioritized in the search query
1103-
// and the test case names are very similar, the fuzzy matching returns all test cases.
1104-
assertEquals(testCasesNum, allEntities.getData().size());
1102+
// Note: With edge_ngram tokenization preserving underscores, prefix-based matching
1103+
// only finds test cases linked to tables whose names START with "test_getSimpleListFromSearch"
1104+
// (tables at index 0 and 1). Tables 2-4 have names starting with "table_..." so don't match.
1105+
assertEquals(2, allEntities.getData().size());
11051106

11061107
queryParams.clear();
11071108
queryParams.put("entityLink", testCaseForEL.getEntityLink());
@@ -1222,8 +1223,9 @@ void test_getSimpleListFromSearch(TestInfo testInfo) throws IOException, ParseEx
12221223
assertNotNull(testCase.getId());
12231224

12241225
// Test return only the specified dimension
1226+
// Note: TEST_DEFINITION4 (tableRowCountToBeBetween) has dataQualityDimension: "Integrity"
12251227
queryParams.clear();
1226-
queryParams.put("dataQualityDimension", "Completeness");
1228+
queryParams.put("dataQualityDimension", "Integrity");
12271229
allEntities = listEntitiesFromSearch(queryParams, testCasesNum, 0, ADMIN_AUTH_HEADERS);
12281230
assertNotEquals(0, allEntities.getData().size());
12291231

@@ -6275,4 +6277,111 @@ void test_listTestCasesFromSearch_ownerOnlySeesOwnedTestCases(TestInfo testInfo)
62756277
TestUtils.restoreOrganizationPolicies(savedPolicies);
62766278
}
62776279
}
6280+
6281+
@Test
6282+
@Order(Integer.MAX_VALUE)
6283+
void test_listTestCasesFromSearch_wildcardQuery(TestInfo testInfo) throws IOException {
6284+
if (!supportsSearchIndex) {
6285+
return;
6286+
}
6287+
6288+
TableResourceTest tableResourceTest = new TableResourceTest();
6289+
6290+
// Create a table for test cases
6291+
CreateTable tableReq =
6292+
tableResourceTest
6293+
.createRequest(testInfo)
6294+
.withName("wildcardSearchTestTable")
6295+
.withDatabaseSchema(DATABASE_SCHEMA.getFullyQualifiedName())
6296+
.withColumns(
6297+
List.of(new Column().withName(C1).withDisplayName("c1").withDataType(BIGINT)));
6298+
Table table = tableResourceTest.createAndCheckEntity(tableReq, ADMIN_AUTH_HEADERS);
6299+
String tableLink = String.format("<#E::table::%s>", table.getFullyQualifiedName());
6300+
6301+
// Create test cases with distinct names to test wildcard matching
6302+
// Test case 1: Should match "*api_e*" - contains "api_e" substring
6303+
CreateTestCase createApiEndpoint =
6304+
createRequest(testInfo)
6305+
.withName("api_endpoint_column_test")
6306+
.withEntityLink(tableLink)
6307+
.withTestDefinition(TEST_DEFINITION4.getFullyQualifiedName());
6308+
TestCase apiEndpointTestCase = createEntity(createApiEndpoint, ADMIN_AUTH_HEADERS);
6309+
6310+
// Test case 2: Should NOT match "*api_e*" - does NOT contain "api_e" substring
6311+
// (contains "api_" and "entity" separately but not "api_e")
6312+
CreateTestCase createApiService =
6313+
createRequest(testInfo)
6314+
.withName("api_service_entity_test")
6315+
.withEntityLink(tableLink)
6316+
.withTestDefinition(TEST_DEFINITION4.getFullyQualifiedName());
6317+
TestCase apiServiceTestCase = createEntity(createApiService, ADMIN_AUTH_HEADERS);
6318+
6319+
// Test case 3: Should NOT match "*api_e*" - completely different name
6320+
CreateTestCase createUnrelated =
6321+
createRequest(testInfo)
6322+
.withName("unrelated_column_test")
6323+
.withEntityLink(tableLink)
6324+
.withTestDefinition(TEST_DEFINITION4.getFullyQualifiedName());
6325+
TestCase unrelatedTestCase = createEntity(createUnrelated, ADMIN_AUTH_HEADERS);
6326+
6327+
// Wait for indexing
6328+
Awaitility.await()
6329+
.atMost(10, TimeUnit.SECONDS)
6330+
.pollInterval(1, TimeUnit.SECONDS)
6331+
.untilAsserted(
6332+
() -> {
6333+
Map<String, String> queryParams = new HashMap<>();
6334+
queryParams.put("entityLink", tableLink);
6335+
ResultList<TestCase> results =
6336+
listEntitiesFromSearch(queryParams, 10, 0, ADMIN_AUTH_HEADERS);
6337+
assertEquals(3, results.getData().size(), "All 3 test cases should be indexed");
6338+
});
6339+
6340+
// Test 1: Wildcard search with "*api_e*" pattern
6341+
// Expected: Only "api_endpoint_column_test" should match (contains "api_e")
6342+
// "api_service_entity_test" should NOT match (contains "api_s", not "api_e")
6343+
Map<String, String> queryParams = new HashMap<>();
6344+
queryParams.put("q", "*api_e*");
6345+
queryParams.put("includeAllTests", "true");
6346+
queryParams.put("entityLink", tableLink);
6347+
6348+
ResultList<TestCase> wildcardResults =
6349+
listEntitiesFromSearch(queryParams, 10, 0, ADMIN_AUTH_HEADERS);
6350+
6351+
List<String> resultNames =
6352+
wildcardResults.getData().stream().map(TestCase::getName).collect(Collectors.toList());
6353+
6354+
// Verify correct wildcard matching behavior
6355+
assertTrue(
6356+
resultNames.contains("api_endpoint_column_test"),
6357+
"api_endpoint_column_test should match '*api_e*' pattern");
6358+
6359+
assertFalse(
6360+
resultNames.contains("api_service_entity_test"),
6361+
"api_service_entity_test should NOT match '*api_e*' pattern - "
6362+
+ "it contains 'api_s' not 'api_e'");
6363+
6364+
assertFalse(
6365+
resultNames.contains("unrelated_column_test"),
6366+
"unrelated_column_test should NOT match '*api_e*' pattern");
6367+
6368+
// Test 2: Non-wildcard search should work normally
6369+
queryParams.clear();
6370+
queryParams.put("q", "api_endpoint");
6371+
queryParams.put("entityLink", tableLink);
6372+
6373+
ResultList<TestCase> exactResults =
6374+
listEntitiesFromSearch(queryParams, 10, 0, ADMIN_AUTH_HEADERS);
6375+
List<String> exactResultNames =
6376+
exactResults.getData().stream().map(TestCase::getName).collect(Collectors.toList());
6377+
6378+
assertTrue(
6379+
exactResultNames.contains("api_endpoint_column_test"),
6380+
"api_endpoint_column_test should match 'api_endpoint' query");
6381+
6382+
deleteEntity(apiEndpointTestCase.getId(), true, true, ADMIN_AUTH_HEADERS);
6383+
deleteEntity(apiServiceTestCase.getId(), true, true, ADMIN_AUTH_HEADERS);
6384+
deleteEntity(unrelatedTestCase.getId(), true, true, ADMIN_AUTH_HEADERS);
6385+
tableResourceTest.deleteEntity(table.getId(), true, true, ADMIN_AUTH_HEADERS);
6386+
}
62786387
}

openmetadata-spec/src/main/resources/elasticsearch/en/test_case_index_mapping.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
"max_gram": 20,
99
"token_chars": [
1010
"letter",
11-
"digit"
12-
]
11+
"digit",
12+
"custom"
13+
],
14+
"custom_token_chars": "_"
1315
}
1416
},
1517
"normalizer": {

0 commit comments

Comments
 (0)