-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fixes #27796: enforce entityType filter in MCP search_metadata #27821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| import static org.openmetadata.service.security.DefaultAuthorizer.getSubjectContext; | ||
|
|
||
| import com.fasterxml.jackson.databind.JsonNode; | ||
| import com.fasterxml.jackson.databind.node.ArrayNode; | ||
| import com.fasterxml.jackson.databind.node.ObjectNode; | ||
| import com.google.common.annotations.VisibleForTesting; | ||
| import jakarta.ws.rs.core.Response; | ||
|
|
@@ -192,44 +193,35 @@ public Map<String, Object> execute( | |
| LOG.debug("Applied query filter to query: {}", queryFilter); | ||
| } | ||
|
|
||
| queryFilter = addEntityTypeFilter(queryFilter, entityType); | ||
|
|
||
| LOG.info( | ||
| "Search query: {}, index: {}, limit: {}, includeDeleted: {}", | ||
| queryFilter, | ||
| index, | ||
| size, | ||
| includeDeleted); | ||
|
|
||
| SearchRequest searchRequest; | ||
| boolean userProvidedQueryFilter = params.containsKey("queryFilter"); | ||
| SearchRequest searchRequest = | ||
| new SearchRequest() | ||
| .withIndex(Entity.getSearchRepository().getIndexOrAliasName(index)) | ||
| .withSize(size) | ||
| .withFrom(from) | ||
| .withFetchSource(true) | ||
| .withDeleted(includeDeleted); | ||
| if (!nullOrEmpty(queryFilter)) { | ||
| // When queryFilter is provided, use it directly as it's already a transformed OpenSearch | ||
| // query | ||
| searchRequest = | ||
| new SearchRequest() | ||
| .withIndex(Entity.getSearchRepository().getIndexOrAliasName(index)) | ||
| .withQueryFilter(queryFilter) | ||
| .withSize(size) | ||
| .withFrom(from) | ||
| .withFetchSource(true) | ||
| .withDeleted(includeDeleted); | ||
| } else { | ||
| // Fallback to basic query when no queryFilter is provided | ||
| searchRequest = | ||
| new SearchRequest() | ||
| .withQuery(query) | ||
| .withIndex(Entity.getSearchRepository().getIndexOrAliasName(index)) | ||
| .withSize(size) | ||
| .withFrom(from) | ||
| .withFetchSource(true) | ||
| .withDeleted(includeDeleted); | ||
| searchRequest.withQueryFilter(queryFilter); | ||
| } | ||
| if (!userProvidedQueryFilter) { | ||
| searchRequest.withQuery(query); | ||
| } | ||
|
|
||
| SubjectContext subjectContext = getSubjectContext(securityContext); | ||
| Response response; | ||
| if (!nullOrEmpty(queryFilter)) { | ||
| // Use direct query method when queryFilter is provided since it's already a transformed query | ||
| if (userProvidedQueryFilter) { | ||
| response = Entity.getSearchRepository().searchWithDirectQuery(searchRequest, subjectContext); | ||
| } else { | ||
| // Use regular search for basic queries | ||
| response = Entity.getSearchRepository().search(searchRequest, subjectContext); | ||
| } | ||
|
|
||
|
|
@@ -383,6 +375,66 @@ public static Map<String, Object> cleanSearchResponseObject(Map<String, Object> | |
| return object; | ||
| } | ||
|
|
||
| /** | ||
| * Ensures results are constrained to the requested entityType by injecting an explicit | ||
| * {@code term} filter on the {@code entityType} field. Targeting an alias by itself is not | ||
| * always sufficient — for example, when the alias resolves to a multi-entity index or fans | ||
| * out to {@code dataAsset} — so the request can leak documents of other types. Adding the | ||
| * term filter guarantees correctness regardless of how the index alias resolves. | ||
| * | ||
| * @param existingFilter user-provided OpenSearch query JSON, already wrapped under "query", or | ||
| * {@code null} | ||
| * @param entityType requested entity type, or {@code null}/blank to leave the filter untouched | ||
| * @return a JSON string containing the merged query filter, or {@code existingFilter} if no | ||
| * entityType was provided | ||
| */ | ||
| @VisibleForTesting | ||
| static String addEntityTypeFilter(String existingFilter, String entityType) { | ||
| if (entityType == null || entityType.isBlank()) { | ||
| return existingFilter; | ||
| } | ||
| ObjectNode termFilter = JsonUtils.getObjectMapper().createObjectNode(); | ||
| termFilter.putObject("term").put("entityType", entityType); | ||
| if (nullOrEmpty(existingFilter)) { | ||
| ObjectNode bool = JsonUtils.getObjectMapper().createObjectNode(); | ||
| ArrayNode filterArray = bool.putObject("bool").putArray("filter"); | ||
| filterArray.add(termFilter); | ||
| ObjectNode wrapper = JsonUtils.getObjectMapper().createObjectNode(); | ||
| wrapper.set("query", bool); | ||
| return JsonUtils.pojoToJson(wrapper); | ||
| } | ||
| try { | ||
| JsonNode root = JsonUtils.getObjectMapper().readTree(existingFilter); | ||
| JsonNode queryNode = root.get("query"); | ||
| if (queryNode == null || !queryNode.isObject()) { | ||
| return existingFilter; | ||
| } | ||
| ObjectNode queryObject = (ObjectNode) queryNode; | ||
| ObjectNode boolNode; | ||
| if (queryObject.has("bool") && queryObject.get("bool").isObject()) { | ||
| boolNode = (ObjectNode) queryObject.get("bool"); | ||
| } else { | ||
| ObjectNode originalCopy = queryObject.deepCopy(); | ||
| queryObject.removeAll(); | ||
| boolNode = queryObject.putObject("bool"); | ||
| boolNode.putArray("must").add(originalCopy); | ||
| } | ||
| ArrayNode filterArray; | ||
| if (boolNode.has("filter") && boolNode.get("filter").isArray()) { | ||
| filterArray = (ArrayNode) boolNode.get("filter"); | ||
| } else { | ||
| filterArray = boolNode.putArray("filter"); | ||
| } | ||
| filterArray.add(termFilter); | ||
|
Comment on lines
+422
to
+428
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Edge Case: Duplicate entityType filter if user's queryFilter already has oneIf a caller passes a Was this helpful? React with 👍 / 👎 | Reply |
||
| return JsonUtils.pojoToJson(root); | ||
| } catch (IOException e) { | ||
| LOG.warn( | ||
| "Unable to merge entityType filter into provided queryFilter, leaving it unchanged: {}", | ||
| e.getMessage()); | ||
| return existingFilter; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Truncates aggregation buckets to prevent excessive response size that could overwhelm LLM | ||
| * context windows. Based on industry best practices, LLM performance degrades when context | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Quality: addEntityTypeFilter exceeds 15-line method limit
The new
addEntityTypeFiltermethod is ~45 lines with 4 levels of nesting, exceeding the project's complexity limits (max 15 lines/method, max 3 nesting levels). The method handles three distinct concerns — creating a fresh filter, merging into an existing bool query, and wrapping a non-bool query — each of which could be a separate private helper.Suggested fix:
Was this helpful? React with 👍 / 👎 | Reply
gitar fixto apply this suggestion