Skip to content

Commit 10b099e

Browse files
lautelJatin Masaram
authored andcommitted
Add new "textToLLMContext" field to improve embeddings (open-metadata#27485)
* Rename textToEmbed > textToLLMContext and textToEmbedSemantic > textToEmbed * Avoid reflection usage
1 parent dc88cbd commit 10b099e

99 files changed

Lines changed: 757 additions & 28 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/PatchTableEmbeddingIT.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,11 @@ private void runEmbeddingTest(TestNamespace ns, SearchRepository searchRepo) thr
113113
updatedFingerprint,
114114
"Fingerprint should change after description update");
115115

116-
String textToEmbed = getFieldFromDoc(searchClient, entityIndexName, tableId, "textToEmbed");
116+
String textToLLMContext =
117+
getFieldFromDoc(searchClient, entityIndexName, tableId, "textToLLMContext");
117118
assertTrue(
118-
textToEmbed.contains("Revenue metrics"),
119-
"textToEmbed should reflect the patched description");
119+
textToLLMContext.contains("Revenue metrics"),
120+
"textToLLMContext should reflect the patched description");
120121

121122
String embeddingJson = getFieldFromDoc(searchClient, entityIndexName, tableId, "embedding");
122123
assertNotNull(embeddingJson, "Embedding vector should exist after PATCH");

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ void testEntityEmbeddingCreationViaPartialUpdate() throws Exception {
128128

129129
Map<String, Object> doc = getDocumentById(testTable.getId().toString());
130130
assertNotNull(doc, "Entity document should exist");
131-
assertNotNull(doc.get("textToEmbed"), "Document should have text_to_embed");
131+
assertNotNull(doc.get("textToLLMContext"), "Document should have textToLLMContext");
132132
assertNotNull(doc.get("embedding"), "Document should have embedding");
133133
assertNotNull(doc.get("fingerprint"), "Document should have fingerprint");
134134
assertEquals(
@@ -323,7 +323,7 @@ void testGenerateEmbeddingFields() {
323323

324324
assertNotNull(fields);
325325
assertNotNull(fields.get("embedding"));
326-
assertNotNull(fields.get("textToEmbed"));
326+
assertNotNull(fields.get("textToLLMContext"));
327327
assertNotNull(fields.get("fingerprint"));
328328
assertEquals(testTable.getId().toString(), fields.get("parentId"));
329329
assertEquals(0, fields.get("chunkIndex"));
@@ -347,7 +347,7 @@ void testPatchTableDescriptionUpdatesEmbeddingForSemanticSearch() throws Excepti
347347

348348
Map<String, Object> initialDoc = getDocumentById(testTable.getId().toString());
349349
String initialFingerprint = (String) initialDoc.get("fingerprint");
350-
String initialTextToEmbed = (String) initialDoc.get("textToEmbed");
350+
String initialTextToEmbed = (String) initialDoc.get("textToLLMContext");
351351

352352
String patchedDescription = "Revenue metrics for quarterly financial reporting analysis";
353353
testTable.setDescription(patchedDescription);
@@ -358,15 +358,16 @@ void testPatchTableDescriptionUpdatesEmbeddingForSemanticSearch() throws Excepti
358358

359359
Map<String, Object> updatedDoc = getDocumentById(testTable.getId().toString());
360360
String updatedFingerprint = (String) updatedDoc.get("fingerprint");
361-
String updatedTextToEmbed = (String) updatedDoc.get("textToEmbed");
361+
String updatedTextToEmbed = (String) updatedDoc.get("textToLLMContext");
362362

363363
assertFalse(
364364
initialFingerprint.equals(updatedFingerprint), "Fingerprint should change after PATCH");
365365
assertFalse(
366-
initialTextToEmbed.equals(updatedTextToEmbed), "textToEmbed should change after PATCH");
366+
initialTextToEmbed.equals(updatedTextToEmbed),
367+
"textToLLMContext should change after PATCH");
367368
assertTrue(
368369
updatedTextToEmbed.contains("Revenue metrics"),
369-
"Updated textToEmbed should reflect patched description");
370+
"Updated textToLLMContext should reflect patched description");
370371

371372
List<Map<String, Object>> results =
372373
executeKnnSearch("quarterly financial revenue reporting", 10);

openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/SemanticSearchToolTest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,8 @@ void testHitFieldsCleaned() throws Exception {
203203
hit.put("columns", List.of(Map.of("name", "id", "dataType", "INT")));
204204
hit.put("embedding", new float[] {0.1f, 0.2f});
205205
hit.put("fingerprint", "abc123");
206-
hit.put("textToEmbed", "name: users; entityType: table | description: A short description");
206+
hit.put(
207+
"textToLLMContext", "name: users; entityType: table | description: A short description");
207208

208209
VectorSearchResponse response = new VectorSearchResponse(10L, List.of(hit));
209210

@@ -233,7 +234,7 @@ void testHitFieldsCleaned() throws Exception {
233234
assertTrue(!cleaned.containsKey("_score"));
234235
assertTrue(!cleaned.containsKey("embedding"));
235236
assertTrue(!cleaned.containsKey("fingerprint"));
236-
assertTrue(!cleaned.containsKey("textToEmbed"));
237+
assertTrue(!cleaned.containsKey("textToLLMContext"));
237238
}
238239
}
239240

0 commit comments

Comments
 (0)