From e2eee79b9b1a409722c18c70e62d495b8c4f7c4b Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Wed, 15 Apr 2026 13:11:03 +0300 Subject: [PATCH 01/20] ESQL: Create for type-conflicted PUNKs during resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `EsIndex#fieldToUnmappedIndices` was an `O(numFields * numIndices)` map tracking which indices each field was unmapped in. For indices with many fields (e.g. `.ds-metrics-*`) this caused OutOfMemory errors, as reported in #145920. Partially-unmapped field wrapping (into `PotentiallyUnmappedKeywordEsField` / `InvalidMappedField.potentiallyUnmapped`) happened in the `Analyzer`, using `fieldToUnmappedIndices` as a separate side-channel. Move the wrapping into `IndexResolver#mergedMappings`, encoding the partial-mapping signal directly into the `EsField` hierarchy. Only applied when `trackUnmappedFieldIndices` is on (i.e. `unmapped_fields="load"` or an `INSIST` clause). `EsIndex` no longer carries `fieldToUnmappedIndices`. The `Analyzer` no longer wraps fields. The `Verifier` now inspects the `IndexResolution` mapping directly to detect partially-unmapped non-keyword fields, since `UnionTypesCleanup.cleanTypeConflicts` reverts single-type `potentiallyUnmapped` wrappers in the plan. Additional fixes uncovered along the way: - `wrapPartiallyUnmappedField` skips `OBJECT` fields, which are containers rather than leaves — wrapping them triggered `illegal data type [object]` downstream. - `PotentiallyUnmappedKeywordEsField` is constructed with the full dotted path (e.g. `city.country.continent.name`) so that `DefaultShardContextForUnmappedField#fieldType` matches against the path Lucene passes in. Without this, deeply-nested unmapped keyword subfields returned `null` instead of loading from `_source`. - `GoldenTestCase#mergeMappings` applies the same wrapping as the production path (gated on the same flag), so golden outputs for `unmapped_fields="load"` stay stable without any plan-shape changes. Made-with: Cursor --- .../_nightly/esql/QueryPlanningBenchmark.java | 2 +- .../esql/ViewResolutionBenchmarkBase.java | 2 +- .../xpack/esql/TestAnalyzer.java | 5 +- .../xpack/esql/analysis/Analyzer.java | 79 ++----------------- .../xpack/esql/analysis/Verifier.java | 34 ++++++-- .../PotentiallyUnmappedKeywordEsField.java | 4 +- .../xpack/esql/index/EsIndex.java | 14 +--- .../xpack/esql/index/IndexResolution.java | 2 +- .../xpack/esql/session/EsqlSession.java | 1 - .../xpack/esql/session/IndexResolver.java | 42 +++++----- .../esql/analysis/AnalyzerTestUtils.java | 1 - .../xpack/esql/analysis/AnalyzerTests.java | 3 - .../esql/analysis/AnalyzerUnmappedTests.java | 59 +++++++------- .../xpack/esql/index/EsIndexGenerator.java | 8 +- .../AbstractLogicalPlanOptimizerTests.java | 6 +- .../xpack/esql/optimizer/GoldenTestCase.java | 74 ++++++++--------- .../optimizer/LogicalPlanOptimizerTests.java | 1 - .../xpack/esql/session/EsqlSessionTests.java | 4 +- 18 files changed, 143 insertions(+), 198 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java index d6280faf38be9..28b890665c221 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java @@ -107,7 +107,7 @@ public void setup() { mapping.put("field" + i, new EsField("field-" + i, TEXT, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); } - var esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of(), Map.of()); + var esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of()); var functionRegistry = new EsqlFunctionRegistry(); parser = new EsqlParser(new EsqlConfig(functionRegistry)); diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/esql/ViewResolutionBenchmarkBase.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/esql/ViewResolutionBenchmarkBase.java index 048945c12dbd9..0c16e1fb83244 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/esql/ViewResolutionBenchmarkBase.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/esql/ViewResolutionBenchmarkBase.java @@ -168,7 +168,7 @@ public void setup() { String name = "col" + i; mapping.put(name, new EsField(name, KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)); } - EsIndex esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of(), Map.of()); + EsIndex esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of()); Configuration config = new Configuration( DateUtils.UTC, diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/TestAnalyzer.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/TestAnalyzer.java index 1378a991402f9..4f5a2a02d6322 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/TestAnalyzer.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/TestAnalyzer.java @@ -154,8 +154,7 @@ public TestAnalyzer addNoFieldsIndex() { Map.of(), Map.of(noFieldsIndexName, IndexMode.STANDARD), Map.of("", List.of(noFieldsIndexName)), - Map.of("", List.of(noFieldsIndexName)), - Map.of() + Map.of("", List.of(noFieldsIndexName)) ); addIndex(noFieldsIndexName, IndexResolution.valid(noFieldsIndex)); return this; @@ -781,7 +780,7 @@ public AnalyzerContext buildContext() { */ public static IndexResolution loadMapping(String resource, String indexName, IndexMode indexMode) { return IndexResolution.valid( - new EsIndex(indexName, EsqlTestUtils.loadMapping(resource), Map.of(indexName, indexMode), Map.of(), Map.of(), Map.of()) + new EsIndex(indexName, EsqlTestUtils.loadMapping(resource), Map.of(indexName, indexMode), Map.of(), Map.of()) ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 5cce6877a3c31..0d248e7725efa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -181,7 +181,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeSet; import java.util.function.Function; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -351,10 +350,6 @@ private LogicalPlan resolveIndex(UnresolvedRelation plan, IndexResolution indexR var attributes = mappingAsAttributes(plan.source(), esIndex.mapping()); attributes.addAll(metadata.stream().map(NamedExpression::toAttribute).toList()); - if (context.unmappedResolution() == UnmappedResolution.LOAD) { - loadPartiallyUnmappedFields(attributes, esIndex); - } - return new EsRelation( plan.source(), esIndex.name(), @@ -366,34 +361,6 @@ private LogicalPlan resolveIndex(UnresolvedRelation plan, IndexResolution indexR ); } - /** - * When {@code SET unmapped_fields="load"}, convert partially-mapped fields so they can be used across indices where they - * may not exist: - * - */ - private static void loadPartiallyUnmappedFields(List attributes, EsIndex esIndex) { - for (int i = 0; i < attributes.size(); i++) { - if (attributes.get(i) instanceof FieldAttribute fa && isPartiallyUnmappedRegularField(fa, esIndex)) { - if (fa.dataType() == KEYWORD) { - attributes.set(i, ResolveRefs.insistKeyword(fa)); - } else { - attributes.set(i, ResolveRefs.invalidInsistAttribute(fa, esIndex)); - } - } - } - } - - private static boolean isPartiallyUnmappedRegularField(FieldAttribute fa, EsIndex esIndex) { - // We ignore proper subclasses of FieldAttribute; these represent unsupported or special attributes. - return fa.getClass().equals(FieldAttribute.class) && esIndex.isPartiallyUnmappedField(fa.fieldName().string()); - } - private List resolveMetadata(List metadata, AnalyzerContext context) { LinkedHashMap resolved = new LinkedHashMap<>(); Set allTags = null; @@ -689,7 +656,7 @@ protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) { case MvExpand p -> resolveMvExpand(p, childrenOutput); case Lookup l -> resolveLookup(l, childrenOutput); case LookupJoin j -> resolveLookupJoin(j, context); - case Insist i -> resolveInsist(i, childrenOutput, context); + case Insist i -> resolveInsist(i, childrenOutput); case Fuse fuse -> resolveFuse(fuse, childrenOutput); case Rerank r -> resolveRerank(r, childrenOutput, context); case PromqlCommand promql -> resolvePromql(promql, childrenOutput); @@ -1219,60 +1186,26 @@ private List resolveUsingColumns(List cols, List childrenOutput, AnalyzerContext context) { + private LogicalPlan resolveInsist(Insist insist, List childrenOutput) { List list = new ArrayList<>(); - List resolutions = collectIndexResolutions(insist, context); for (Attribute a : insist.insistedAttributes()) { - list.add(resolveInsistAttribute(a, childrenOutput, resolutions)); + list.add(resolveInsistAttribute(a, childrenOutput)); } return insist.withAttributes(list); } - private static List collectIndexResolutions(LogicalPlan plan, AnalyzerContext context) { - List resolutions = new ArrayList<>(); - plan.forEachDown(EsRelation.class, e -> { - var resolution = context.indexResolution().get(new IndexPattern(e.source(), e.indexPattern())); - if (resolution != null) { - resolutions.add(resolution); - } - }); - return resolutions; - } - - private Attribute resolveInsistAttribute(Attribute attribute, List childrenOutput, List indices) { + private Attribute resolveInsistAttribute(Attribute attribute, List childrenOutput) { Attribute resolvedCol = maybeResolveAttribute((UnresolvedAttribute) attribute, childrenOutput); // Field isn't mapped anywhere. if (resolvedCol instanceof UnresolvedAttribute) { return insistKeyword(attribute); } - // Field is partially unmapped. - // TODO: Should the check for partially unmapped fields be done specific to each sub-query in a fork? - if (resolvedCol instanceof FieldAttribute fa && indices.stream().anyMatch(r -> r.get().isPartiallyUnmappedField(fa.name()))) { - // NOTE: We use indices.getFirst() here as a temporary solution. INSIST will be removed after load is in GA anyway. - return fa.dataType() == KEYWORD ? insistKeyword(fa) : invalidInsistAttribute(fa, indices.getFirst().get()); - } - - // Either the field is mapped everywhere and we can just use the resolved column, or the INSIST clause isn't on top of a FROM - // clause—for example, it might be on top of a ROW clause—so the verifier will catch it and fail. + // Partially unmapped fields are already wrapped during index resolution: + // keyword → PotentiallyUnmappedKeywordEsField, non-keyword → InvalidMappedField.potentiallyUnmapped. return resolvedCol; } - static FieldAttribute invalidInsistAttribute(FieldAttribute fa, EsIndex esIndex) { - InvalidMappedField field = InvalidMappedField.potentiallyUnmapped(fa.field().getName(), getTypesToIndices(fa, esIndex)); - return new FieldAttribute(fa.source(), fa.parentName(), fa.qualifier(), fa.name(), field); - } - - private static Map> getTypesToIndices(FieldAttribute fa, EsIndex esIndex) { - if (fa.field() instanceof InvalidMappedField imf) { - return imf.getTypesToIndices(); - } - // Field isn't currently invalid, meaning it's mapped to a single type in all the indices where it's actually mapped. - TreeSet indicesWithField = new TreeSet<>(esIndex.concreteQualifiedIndices()); - indicesWithField.removeAll(esIndex.getUnmappedIndices(fa.name())); - return Map.of(fa.dataType().typeName(), indicesWithField); - } - public static FieldAttribute insistKeyword(Attribute attribute) { return new FieldAttribute( attribute.source(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 4914eb9d30ab1..6fff8a0cb7d26 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.analysis; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.license.XPackLicenseState; @@ -31,6 +32,8 @@ import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; @@ -42,7 +45,6 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NotEquals; -import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; import org.elasticsearch.xpack.esql.plan.IndexPattern; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; @@ -588,12 +590,11 @@ private static AttributeSet partiallyUnmappedNonKeywords(LogicalPlan plan, Map { IndexResolution indexResolution = indexResolutions.get(new IndexPattern(relation.source(), relation.indexPattern())); if (indexResolution != null && indexResolution.isValid()) { - EsIndex index = indexResolution.get(); + Set fieldNames = collectPotentiallyUnmappedNonKeywords(indexResolution.get().mapping()); for (Attribute attr : relation.output()) { - if (attr instanceof FieldAttribute fa - && index.isPartiallyUnmappedField(fa.fieldName().string()) - && fa.dataType() != DataType.KEYWORD // punk_field::long is fine; in this case, the FieldAttribute contains a MultiTypeEsField with the conversions. + if (attr instanceof FieldAttribute fa + && fieldNames.contains(fa.fieldName().string()) && fa.field() instanceof MultiTypeEsField == false) { punks.add(fa); } @@ -604,6 +605,29 @@ private static AttributeSet partiallyUnmappedNonKeywords(LogicalPlan plan, Map collectPotentiallyUnmappedNonKeywords(Map mapping) { + HashSet result = new HashSet<>(); + collectPotentiallyUnmappedNonKeywords(mapping, null, result); + return result; + } + + private static void collectPotentiallyUnmappedNonKeywords( + Map mapping, + @Nullable String prefix, + Set aggregator + ) { + for (Map.Entry entry : mapping.entrySet()) { + String name = prefix == null ? entry.getKey() : prefix + "." + entry.getKey(); + EsField field = entry.getValue(); + if (field instanceof InvalidMappedField imf && imf.isPotentiallyUnmapped()) { + aggregator.add(name); + } + if (field.getProperties() != null && field.getProperties().isEmpty() == false) { + collectPotentiallyUnmappedNonKeywords(field.getProperties(), name, aggregator); + } + } + } + private void licenseCheck(LogicalPlan plan, Failures failures) { Consumer> licenseCheck = n -> { if (n instanceof LicenseAware la && la.licenseCheck(licenseState) == false) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java index aba1c0c8eb190..21ac986912fff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java @@ -10,7 +10,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import java.io.IOException; -import java.util.Collections; +import java.util.HashMap; /** * This class is used as a marker for fields that may be unmapped, where an unmapped field is a field which exists in the _source but is not @@ -19,7 +19,7 @@ */ public class PotentiallyUnmappedKeywordEsField extends KeywordEsField { public PotentiallyUnmappedKeywordEsField(String name) { - super(name, Collections.emptyMap(), true, Short.MAX_VALUE, false, false, TimeSeriesFieldType.UNKNOWN); + super(name, new HashMap<>(), true, Short.MAX_VALUE, false, false, TimeSeriesFieldType.UNKNOWN); } public PotentiallyUnmappedKeywordEsField(StreamInput in) throws IOException { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/EsIndex.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/EsIndex.java index 351139095cd46..aa2471c9bd97e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/EsIndex.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/EsIndex.java @@ -9,7 +9,6 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.xpack.esql.core.type.EsField; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -19,23 +18,12 @@ public record EsIndex( Map mapping, // keyed by field names Map indexNameWithModes, Map> originalIndices, // keyed by cluster alias - Map> concreteIndices, // keyed by cluster alias - Map> fieldToUnmappedIndices // keyed by field name; Set are concrete index names. + Map> concreteIndices // keyed by cluster alias ) { public EsIndex { assert name != null; assert mapping != null; - assert fieldToUnmappedIndices != null; - assert fieldToUnmappedIndices.values().stream().noneMatch(Set::isEmpty); - } - - public boolean isPartiallyUnmappedField(String fieldName) { - return fieldToUnmappedIndices.containsKey(fieldName); - } - - public Set getUnmappedIndices(String fieldName) { - return fieldToUnmappedIndices.getOrDefault(fieldName, Collections.emptySet()); } public Set concreteQualifiedIndices() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/IndexResolution.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/IndexResolution.java index 752e95bc1e20f..4576931789d87 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/IndexResolution.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/index/IndexResolution.java @@ -40,7 +40,7 @@ public static IndexResolution valid(EsIndex index) { } public static IndexResolution empty(String indexPattern) { - return valid(new EsIndex(indexPattern, Map.of(), Map.of(), Map.of(), Map.of(), Map.of())); + return valid(new EsIndex(indexPattern, Map.of(), Map.of(), Map.of(), Map.of())); } public static IndexResolution invalid(String invalid) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 90f96b2059b9b..6a92f83218831 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -1224,7 +1224,6 @@ private IndexResolution checkSingleIndex( lookupIndexResolution.get().mapping(), Map.of(indexName, IndexMode.LOOKUP), Map.of(), - Map.of(), Map.of() ); return IndexResolution.valid(newIndex, newIndex.concreteQualifiedIndices(), lookupIndexResolution.failures()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index 17e053d7327da..b9fff9612647a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -37,6 +37,7 @@ import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.KeywordEsField; +import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; import org.elasticsearch.xpack.esql.core.type.SupportedVersion; import org.elasticsearch.xpack.esql.core.type.TextEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; @@ -53,7 +54,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; -import java.util.stream.Collectors; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; @@ -330,15 +330,6 @@ public static IndexResolution mergedMappings( String[] names = fieldsCaps.keySet().toArray(new String[0]); Arrays.sort(names); Map rootFields = new HashMap<>(); - Map> fieldToUnmappedIndices; - Set allIndexNames; - if (trackUnmappedFieldIndices) { - fieldToUnmappedIndices = new HashMap<>(); - allIndexNames = indexResponses.stream().map(FieldCapabilitiesIndexResponse::getIndexName).collect(Collectors.toSet()); - } else { - fieldToUnmappedIndices = Map.of(); - allIndexNames = null; - } for (String name : names) { Map fields = rootFields; String fullName = name; @@ -374,14 +365,13 @@ public static IndexResolution mergedMappings( firstUnsupportedParent.getName(), new HashMap<>() ); - fields.put(name, field); - if (trackUnmappedFieldIndices) { - Set unmappedIndices = new TreeSet<>(allIndexNames); - unmappedIndices.removeAll(collectedFieldCaps.fieldToMappedIndices.getOrDefault(fullName, Set.of())); - if (unmappedIndices.isEmpty() == false) { - fieldToUnmappedIndices.put(fullName, unmappedIndices); + if (trackUnmappedFieldIndices && field instanceof UnsupportedEsField == false) { + Set mappedIndices = collectedFieldCaps.fieldToMappedIndices.getOrDefault(fullName, Set.of()); + if (mappedIndices.size() < numberOfIndices) { + field = wrapPartiallyUnmappedField(field, name, fullName, mappedIndices); } } + fields.put(name, field); } boolean allEmpty = true; @@ -409,8 +399,7 @@ public static IndexResolution mergedMappings( // FieldCapabilitiesResponse#resolvedLocally and FieldCapabilitiesResponse#resolvedRemotely // once all remotes support it (v9.3+) originalIndexExtractor.apply(indexPattern, fieldsInfo.caps), - concreteIndices, - fieldToUnmappedIndices + concreteIndices ); var failures = EsqlCCSUtils.groupFailuresPerCluster(fieldsInfo.caps.getFailures()); return IndexResolution.valid(index, indexNameWithModes.keySet(), failures); @@ -528,6 +517,23 @@ private static EsField createField( return new EsField(name, type, new HashMap<>(), aggregatable, isAlias, timeSeriesFieldType); } + // Visible for testing + public static EsField wrapPartiallyUnmappedField(EsField field, String name, String fullName, Set mappedIndices) { + return switch (field.getDataType()) { + // OBJECT fields are containers for subfields, not leaf fields that get queried directly. + // Wrapping them would break downstream code that doesn't expect OBJECT as a data type in InvalidMappedField. + case OBJECT -> field; + // PotentiallyUnmappedKeywordEsField needs the full dotted path for DefaultShardContextForUnmappedField.fieldType(). + case KEYWORD -> new PotentiallyUnmappedKeywordEsField(fullName); + default -> InvalidMappedField.potentiallyUnmapped( + name, + field instanceof InvalidMappedField imf + ? imf.getTypesToIndices() + : Map.of(field.getDataType().widenSmallNumeric().typeName(), mappedIndices) + ); + }; + } + private static UnsupportedEsField unsupported(String name, IndexFieldCapabilities fc) { String originalType = fc.metricType() == TimeSeriesParams.MetricType.COUNTER ? "counter" : fc.type(); return new UnsupportedEsField(name, List.of(originalType)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java index fc74838cea8b0..5f517e29a1e96 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java @@ -118,7 +118,6 @@ public static IndexResolution indexWithDateDateNanosUnionType() { Map.of(dateDateNanos, dateDateNanosField, dateDateNanosLong, dateDateNanosLongField), Map.of("index1", IndexMode.STANDARD, "index2", IndexMode.STANDARD, "index3", IndexMode.STANDARD), Map.of(), - Map.of(), Map.of() ); return IndexResolution.valid(index); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 1c4af4362b09f..dc9b7e1d99a5f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -4671,7 +4671,6 @@ public void testProjectionForUnionTypeResolution() { Map.of("id", idField, "foo", fooField), // Updated mapping keys Map.of("union_index_1", IndexMode.STANDARD, "union_index_2", IndexMode.STANDARD), Map.of(), - Map.of(), Map.of() ); IndexResolution resolution = IndexResolution.valid(index); @@ -4714,7 +4713,6 @@ public void testExplicitRetainOriginalFieldWithCast() { Map.of("id", idField), Map.of("test1", IndexMode.STANDARD, "test2", IndexMode.STANDARD), Map.of(), - Map.of(), Map.of() ); IndexResolution resolution = IndexResolution.valid(index); @@ -4987,7 +4985,6 @@ public void testImplicitCastingForAggregateMetricDouble() { mapping, Map.of("k8s", IndexMode.TIME_SERIES, "k8s-downsampled", IndexMode.TIME_SERIES), Map.of(), - Map.of(), Map.of() ); var testAnalyzer = analyzer().addIndex(esIndex); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java index 10bc31dd6e63a..3ee5c344b9a6d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java @@ -34,15 +34,16 @@ import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.OrderBy; import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.hamcrest.Matcher; import org.hamcrest.Matchers; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import static java.util.Collections.emptyMap; import static org.elasticsearch.xpack.esql.EsqlTestUtils.analyzer; @@ -862,23 +863,17 @@ public void testPartiallyMappedNonKeywordFieldsMarkedAsPotentiallyUnmapped() { if (excludedTypes.contains(dataType)) { continue; } - // Build a minimal mapping: one keyword field (emp_no stand-in for SORT) and one field of the type under test + // Build a minimal mapping: one keyword field (emp_no stand-in for SORT) and one field of the type under test, + // with the latter wrapped as InvalidMappedField.potentiallyUnmapped (as IndexResolver would do in production). Map mapping = Map.of( "sort_field", new EsField("sort_field", DataType.INTEGER, Map.of(), true, EsField.TimeSeriesFieldType.NONE), "test_field", - new EsField("test_field", dataType, Map.of(), true, EsField.TimeSeriesFieldType.NONE) + InvalidMappedField.potentiallyUnmapped("test_field", Map.of(dataType.widenSmallNumeric().typeName(), Set.of("test1"))) ); var plan = analyzer().addIndex( - new EsIndex( - "test*", - mapping, - Map.of("test1", IndexMode.STANDARD, "test2", IndexMode.STANDARD), - Map.of(), - Map.of(), - Map.of("test_field", Set.of("test2")) // partially unmapped - ) + new EsIndex("test*", mapping, Map.of("test1", IndexMode.STANDARD, "test2", IndexMode.STANDARD), Map.of(), Map.of()) ).statement(setUnmappedLoad(""" FROM test* | SORT sort_field @@ -1146,13 +1141,16 @@ public void testDisallowLoadWithPartialNonKeywordAndTypeConflictInSameEval() { "conflicted", Map.of(DataType.LONG.typeName(), Set.of("idx_a"), DataType.DOUBLE.typeName(), Set.of("idx_b")) ); + var partialLong = InvalidMappedField.potentiallyUnmapped( + "partial_long", + Map.of(DataType.LONG.typeName(), Set.of("idx_a", "idx_b")) + ); var merged = new EsIndex( "idx*", - Map.of("partial_long", longField("partial_long"), "conflicted", conflicted), + Map.of("partial_long", partialLong, "conflicted", conflicted), Map.of("idx_a", IndexMode.STANDARD, "idx_b", IndexMode.STANDARD, "idx_unmapped", IndexMode.STANDARD), Map.of(), - Map.of(), - Map.of("partial_long", Set.of("idx_unmapped")) + Map.of() ); assertUnmappedLoadError( analyzer().addIndex("idx*", IndexResolution.valid(merged)), @@ -1187,13 +1185,13 @@ public void testAllowLoadCommaSeparatedIndicesWhenPartialNonKeywordUnused() { assumeTrue("Requires OPTIONAL_FIELDS_V5", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V5.isEnabled()); var pattern = "idx_a,idx_b"; + var partialLong = InvalidMappedField.potentiallyUnmapped("partial_long", Map.of(DataType.LONG.typeName(), Set.of("idx_a"))); var merged = new EsIndex( pattern, - Map.of("partial_long", longField("partial_long"), "common", keywordField("common")), + Map.of("partial_long", partialLong, "common", keywordField("common")), Map.of("idx_a", IndexMode.STANDARD, "idx_b", IndexMode.STANDARD), Map.of(), - Map.of(), - Map.of("partial_long", Set.of("idx_b")) + Map.of() ); var plan = analyzer().addIndex(pattern, IndexResolution.valid(merged)) .statement(setUnmappedLoad("FROM idx_a, idx_b | KEEP common")); @@ -1204,13 +1202,13 @@ public void testDisallowLoadCommaSeparatedIndicesWhenPartialNonKeywordUsed() { assumeTrue("Requires OPTIONAL_FIELDS_V5", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V5.isEnabled()); var pattern = "idx_a,idx_b"; + var partialLong = InvalidMappedField.potentiallyUnmapped("partial_long", Map.of(DataType.LONG.typeName(), Set.of("idx_a"))); var merged = new EsIndex( pattern, - Map.of("partial_long", longField("partial_long"), "common", keywordField("common")), + Map.of("partial_long", partialLong, "common", keywordField("common")), Map.of("idx_a", IndexMode.STANDARD, "idx_b", IndexMode.STANDARD), Map.of(), - Map.of(), - Map.of("partial_long", Set.of("idx_b")) + Map.of() ); assertUnmappedLoadError( analyzer().addIndex(pattern, IndexResolution.valid(merged)), @@ -1296,9 +1294,9 @@ public void testDisallowLoadWithPartiallyMappedNonKeywordInMvExpand() { public void testDisallowLoadWithPartiallyMappedNonKeywordDottedPath() { assumeTrue("Requires OPTIONAL_FIELDS_V5", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V5.isEnabled()); - var sub = longField("sub"); + var sub = InvalidMappedField.potentiallyUnmapped("sub", Map.of(DataType.LONG.typeName(), Set.of("idx_mapped"))); var obj = new EsField("obj", DataType.OBJECT, Map.of("sub", sub), true, EsField.TimeSeriesFieldType.NONE); - var esIndex = partialIndex(Map.of("obj", obj), Set.of("obj.sub")); + var esIndex = new EsIndex("idx*", Map.of("obj", obj), Map.of("idx_mapped", IndexMode.STANDARD), Map.of(), Map.of()); assertUnmappedLoadError(analyzer().addIndex(esIndex), "FROM idx* | SORT `obj.sub`", partiallyUnmappedNonKeywordError("obj.sub")); } @@ -1310,7 +1308,7 @@ public void testDisallowLoadWithPartialUnionTimestampInWhere() { assumeTrue("Requires OPTIONAL_FIELDS_V5", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V5.isEnabled()); var pattern = "sample_data,sample_data_ts_nanos,no_mapping_sample_data"; - var tsField = new InvalidMappedField( + var tsField = InvalidMappedField.potentiallyUnmapped( "@timestamp", Map.of(DataType.DATETIME.typeName(), Set.of("sample_data"), DataType.DATE_NANOS.typeName(), Set.of("sample_data_ts_nanos")) ); @@ -1326,8 +1324,7 @@ public void testDisallowLoadWithPartialUnionTimestampInWhere() { IndexMode.STANDARD ), Map.of(), - Map.of(), - Map.of("@timestamp", Set.of("no_mapping_sample_data")) + Map.of() ); assertUnmappedLoadError( analyzer().addIndex(pattern, IndexResolution.valid(merged)), @@ -1423,7 +1420,7 @@ private static TestAnalyzer test() { private static TestAnalyzer index1() { Map mapping = Map.of("field", new UnsupportedEsField("field", List.of("flattened"))); - return analyzer().addIndex(new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of(), Map.of())); + return analyzer().addIndex(new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of())); } private static void assertUnmappedLoadError(TestAnalyzer analyzer, String query, Matcher matcher) { @@ -1445,9 +1442,13 @@ private void typeConflictVerificationFailure(String statement, Map mapping, Set partialFieldNames) { - Map> fieldToUnmappedIndices = partialFieldNames.stream() - .collect(Collectors.toMap(f -> f, f -> Set.of("idx_unmapped"))); - return new EsIndex("idx*", mapping, Map.of("idx_mapped", IndexMode.STANDARD), Map.of(), Map.of(), fieldToUnmappedIndices); + Set mappedIndices = Set.of("idx_mapped"); + Map wrappedMapping = new HashMap<>(mapping); + for (String fieldName : partialFieldNames) { + EsField field = wrappedMapping.get(fieldName); + wrappedMapping.put(fieldName, IndexResolver.wrapPartiallyUnmappedField(field, fieldName, fieldName, mappedIndices)); + } + return new EsIndex("idx*", wrappedMapping, Map.of("idx_mapped", IndexMode.STANDARD), Map.of(), Map.of()); } private static EsField longField(String name) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexGenerator.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexGenerator.java index 2a686260e804a..50af3acdf0735 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexGenerator.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/index/EsIndexGenerator.java @@ -25,19 +25,19 @@ public class EsIndexGenerator { public static EsIndex esIndex(String name) { - return new EsIndex(name, Map.of(), Map.of(), Map.of(), Map.of(), Map.of()); + return new EsIndex(name, Map.of(), Map.of(), Map.of(), Map.of()); } public static EsIndex esIndex(String name, Map mapping) { - return new EsIndex(name, mapping, Map.of(), Map.of(), Map.of(), Map.of()); + return new EsIndex(name, mapping, Map.of(), Map.of(), Map.of()); } public static EsIndex esIndex(String name, Map mapping, Map indexNameWithModes) { - return new EsIndex(name, mapping, indexNameWithModes, Map.of(), Map.of(), Map.of()); + return new EsIndex(name, mapping, indexNameWithModes, Map.of(), Map.of()); } public static EsIndex randomEsIndex() { - return new EsIndex(randomIdentifier(), randomMapping(), randomIndexNameWithModes(), Map.of(), Map.of(), Map.of()); + return new EsIndex(randomIdentifier(), randomMapping(), randomIndexNameWithModes(), Map.of(), Map.of()); } public static Map randomMapping() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java index b8f243c6a3f37..1cdc76ed387d9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java @@ -104,8 +104,7 @@ protected static TestAnalyzer multiIndexAnalyzer() { multiIndexMapping, Map.of("test1", IndexMode.STANDARD, "test2", IndexMode.STANDARD), Map.of(), - Map.of(), - Map.of("partial_type_keyword", Set.of("test2")) + Map.of() ); return analyzerWithEnrichPolicies().addIndex(multiIndex); } @@ -137,8 +136,7 @@ protected static TestAnalyzer unionIndexAnalyzer() { Map.of("languages", languages, "last_name", lastName, "salary_change", salaryChange, "first_name", firstName, "id", idField), Map.of("union_types_index", IndexMode.STANDARD, "union_types_index_incompatible", IndexMode.STANDARD), Map.of("", List.of("union_types_index*")), - Map.of("", List.of("union_types_index_incompatible", "union_types_index")), - Map.of() + Map.of("", List.of("union_types_index_incompatible", "union_types_index")) ); return analyzerWithEnrichPolicies().addAnalysisTestsInferenceResolution() .addIndex(unionIndex) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java index 070ec723eaa2b..1f8ee51fd8980 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.esql.TestAnalyzer; import org.elasticsearch.xpack.esql.analysis.Analyzer; import org.elasticsearch.xpack.esql.analysis.PreAnalyzer; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; import org.elasticsearch.xpack.esql.approximation.ApproximationPlan; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FoldContext; @@ -44,6 +45,7 @@ import org.elasticsearch.xpack.esql.plan.EsqlStatement; import org.elasticsearch.xpack.esql.plan.IndexPattern; import org.elasticsearch.xpack.esql.plan.QueryPlan; +import org.elasticsearch.xpack.esql.plan.logical.Insist; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.physical.ExchangeExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; @@ -58,6 +60,7 @@ import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.elasticsearch.xpack.esql.plugin.ReductionPlan; import org.elasticsearch.xpack.esql.session.Configuration; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.session.Versioned; import org.elasticsearch.xpack.esql.stats.SearchStats; import org.junit.internal.AssumptionViolatedException; @@ -85,7 +88,6 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; -import static java.util.stream.Collectors.toSet; import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.CSV_DATASET; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_PARSER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.analyzer; @@ -254,13 +256,16 @@ private List> doTests() throws IOException { Path queryPath = PathUtils.get(basePath.toString(), queryPathParts); Files.createDirectories(queryPath.getParent()); Files.writeString(queryPath, esqlQuery); + UnmappedResolution unmappedResolution = statement.setting(UNMAPPED_FIELDS); TestAnalyzer testAnalyzer = analyzer().addLanguagesLookup() .addTestLookup() .addAnalysisTestsEnrichResolution() .addAnalysisTestsInferenceResolution() .minimumTransportVersion(transportVersion) - .unmappedResolution(statement.setting(UNMAPPED_FIELDS)); - loadIndexResolution(testDatasets(parsedPlan)).forEach( + .unmappedResolution(unmappedResolution); + boolean trackUnmappedFieldIndices = unmappedResolution == UnmappedResolution.LOAD + || parsedPlan.anyMatch(p -> p instanceof Insist); + loadIndexResolution(testDatasets(parsedPlan), trackUnmappedFieldIndices).forEach( (pattern, resolution) -> testAnalyzer.addIndex(pattern.indexPattern(), resolution) ); Analyzer analyzer = testAnalyzer.buildAnalyzer(); @@ -789,33 +794,38 @@ private static Map testD } public static Map loadIndexResolution( - Map datasets + Map datasets, + boolean trackUnmappedFieldIndices ) { Map indexResolutions = new HashMap<>(); for (var entry : datasets.entrySet()) { - indexResolutions.put(entry.getKey(), loadIndexResolution(entry.getValue())); + indexResolutions.put(entry.getKey(), loadIndexResolution(entry.getValue(), trackUnmappedFieldIndices)); } return indexResolutions; } + public static Map loadIndexResolution( + Map datasets + ) { + return loadIndexResolution(datasets, false); + } + public static IndexResolution loadIndexResolution(CsvTestsDataLoader.MultiIndexTestDataset datasets) { + return loadIndexResolution(datasets, false); + } + + public static IndexResolution loadIndexResolution( + CsvTestsDataLoader.MultiIndexTestDataset datasets, + boolean trackUnmappedFieldIndices + ) { var indexNames = datasets.datasets().stream().map(CsvTestsDataLoader.TestDataset::indexName); Map indexModes = indexNames.collect(Collectors.toMap(x -> x, x -> IndexMode.STANDARD)); List mappings = datasets.datasets() .stream() .map(ds -> new MappingPerIndex(ds.indexName(), createMappingForIndex(ds))) .toList(); - var mergedMappings = mergeMappings(mappings); - return IndexResolution.valid( - new EsIndex( - datasets.indexPattern(), - mergedMappings.mapping, - indexModes, - Map.of(), - Map.of(), - mergedMappings.fieldToUnmappedIndices - ) - ); + var mergedMappings = mergeMappings(mappings, trackUnmappedFieldIndices); + return IndexResolution.valid(new EsIndex(datasets.indexPattern(), mergedMappings.mapping, indexModes, Map.of(), Map.of())); } // TODO should de-duplicate, strong overlap with CsvTestsDataLoader#readMappingFile @@ -869,33 +879,25 @@ private static Map createMappingForIndex(CsvTestsDataLoader.Tes record MappingPerIndex(String index, Map mapping) {} - record MergedResult(Map mapping, Map> fieldToUnmappedIndices) {} + record MergedResult(Map mapping) {} - private static MergedResult mergeMappings(List mappingsPerIndex) { - int numberOfIndices = mappingsPerIndex.size(); - Set allIndices = mappingsPerIndex.stream().map(MappingPerIndex::index).collect(toSet()); + private static MergedResult mergeMappings(List mappingsPerIndex, boolean trackUnmappedFieldIndices) { Map> columnNamesToFieldByIndices = new HashMap<>(); for (var mappingPerIndex : mappingsPerIndex) { for (var entry : mappingPerIndex.mapping().entrySet()) { - String columnName = entry.getKey(); - EsField field = entry.getValue(); - columnNamesToFieldByIndices.computeIfAbsent(columnName, k -> new HashMap<>()).put(mappingPerIndex.index(), field); + columnNamesToFieldByIndices.computeIfAbsent(entry.getKey(), k -> new HashMap<>()) + .put(mappingPerIndex.index(), entry.getValue()); } } - - Map> fieldToUnmappedIndices = new HashMap<>(); - for (var e : columnNamesToFieldByIndices.entrySet()) { - if (e.getValue().size() < numberOfIndices) { - Set unmappedIndices = allIndices.stream().filter(i -> e.getValue().containsKey(i) == false).collect(toSet()); - if (unmappedIndices.isEmpty() == false) { - fieldToUnmappedIndices.put(e.getKey(), unmappedIndices); - } + int numberOfIndices = mappingsPerIndex.size(); + var mappings = columnNamesToFieldByIndices.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> { + EsField field = mergeFields(e.getKey(), e.getValue()); + if (trackUnmappedFieldIndices && e.getValue().size() < numberOfIndices) { + field = IndexResolver.wrapPartiallyUnmappedField(field, e.getKey(), e.getKey(), e.getValue().keySet()); } - } - var mappings = columnNamesToFieldByIndices.entrySet() - .stream() - .collect(Collectors.toMap(Map.Entry::getKey, e -> mergeFields(e.getKey(), e.getValue()))); - return new MergedResult(mappings, fieldToUnmappedIndices); + return field; + })); + return new MergedResult(mappings); } private static EsField mergeFields(String index, Map columnNameToField) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index f68caae1df5d6..600afa4d377cb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -11176,7 +11176,6 @@ public void testTsWildcardStatsWithMixedIndexModes() { mapping, Map.of("ts_index", IndexMode.TIME_SERIES, "standard_index", IndexMode.STANDARD), Map.of(), - Map.of(), Map.of() ); var testAnalyzer = EsqlTestUtils.analyzer().addIndex(IndexResolution.valid(mixedIndex)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlSessionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlSessionTests.java index 28269a5d87f2d..6907856ad932e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlSessionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlSessionTests.java @@ -60,7 +60,7 @@ public void testRefineConcreteTimeSeriesResolutionReturnsHelpfulError() { public void testRefineConcreteTimeSeriesResolutionKeepsOriginalFailures() { FieldCapabilitiesFailure failure = new FieldCapabilitiesFailure(new String[] { "logs" }, new ElasticsearchException("boom")); IndexResolution originalResolution = IndexResolution.valid( - new EsIndex("logs", Map.of(), Map.of(), Map.of(), Map.of(), Map.of()), + new EsIndex("logs", Map.of(), Map.of(), Map.of(), Map.of()), Set.of(), Map.of("remote", List.of(failure)) ); @@ -76,7 +76,7 @@ public void testRefineConcreteTimeSeriesResolutionKeepsOriginalFailures() { private static IndexResolution resolvedIndex(String indexName) { return IndexResolution.valid( - new EsIndex(indexName, Map.of(), Map.of(indexName, IndexMode.STANDARD), Map.of(), Map.of(), Map.of()), + new EsIndex(indexName, Map.of(), Map.of(indexName, IndexMode.STANDARD), Map.of(), Map.of()), Set.of(indexName), Map.of() ); From ff9b59175a0b788597ea36285c713d97fa7c0f39 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Fri, 17 Apr 2026 20:49:44 +0300 Subject: [PATCH 02/20] ESQL: Fix INSIST after partial-keyword wrapping move Two related fixes after moving partially-mapped keyword wrapping from the Analyzer into IndexResolver: - multiIndexAnalyzer test helper used to mark partial_type_keyword via the (now removed) fieldToUnmappedIndices map. Wrap it via IndexResolver.wrapPartiallyUnmappedField, mirroring production. - PropgateUnmappedFields had an all-or-nothing short-circuit: if the EsRelation already contained any PUK, the rule skipped propagation entirely. That worked when wrapping happened in the Analyzer for load mode only, but now any partially-mapped keyword is pre-wrapped, so the short-circuit dropped new PUKs introduced by INSIST on a field absent from the index. Filter to PUKs not already in the relation's output and only merge the missing ones. Made-with: Cursor --- .../rules/logical/PropgateUnmappedFields.java | 42 +++++++++++-------- .../AbstractLogicalPlanOptimizerTests.java | 4 +- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java index 23f2110d628cc..b1a28c2ea8c05 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; @@ -16,6 +17,9 @@ import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** * Merges unmapped fields into the output of the ES relation. This marking is necessary for the block loaders to force loading from _source @@ -34,24 +38,26 @@ public LogicalPlan apply(LogicalPlan logicalPlan) { } }); var unmappedFields = unmappedFieldsBuilder.build(); - return unmappedFields.isEmpty() - ? logicalPlan - : logicalPlan.transformUp( - EsRelation.class, - er -> hasPotentiallyUnmappedKeywordEsField(er) - ? er - : er.withAttributes(NamedExpressions.mergeOutputAttributes(new ArrayList<>(unmappedFields), er.output())) - ); - } - - // Checks if the EsRelation already has a PotentiallyUnmappedKeywordEsField. If true SET load_unmapped="load" is applied. - // This is used to practically disable the rule, since it changes the output order (mergeOutputAttributes()). - private static boolean hasPotentiallyUnmappedKeywordEsField(EsRelation er) { - for (var attr : er.output()) { - if (attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField) { - return true; - } + if (unmappedFields.isEmpty()) { + return logicalPlan; } - return false; + return logicalPlan.transformUp(EsRelation.class, er -> { + Set existingPuks = new HashSet<>(); + for (Attribute attr : er.output()) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField) { + existingPuks.add(fa.fieldName().string()); + } + } + // Only propagate PUK fields that are not already in the relation's output, so we preserve the existing order. + // Partially-mapped keyword fields are already wrapped as PUKs by the index resolver; this rule just merges in + // PUKs introduced elsewhere (e.g. by INSIST on a field that is not in the index). + List missing = new ArrayList<>(); + for (Attribute attr : unmappedFields) { + if (attr instanceof FieldAttribute fa && existingPuks.contains(fa.fieldName().string()) == false) { + missing.add(fa); + } + } + return missing.isEmpty() ? er : er.withAttributes(NamedExpressions.mergeOutputAttributes(missing, er.output())); + }); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java index 1cdc76ed387d9..91c634b3a6d2f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.junit.BeforeClass; import java.util.LinkedHashMap; @@ -95,9 +96,10 @@ protected static TestAnalyzer metricsAnalyzer() { protected static TestAnalyzer multiIndexAnalyzer() { var multiIndexMapping = loadMapping("mapping-basic.json"); + EsField partialTypeKeyword = new EsField("partial_type_keyword", KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE); multiIndexMapping.put( "partial_type_keyword", - new EsField("partial_type_keyword", KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE) + IndexResolver.wrapPartiallyUnmappedField(partialTypeKeyword, "partial_type_keyword", "partial_type_keyword", Set.of("test1")) ); var multiIndex = new EsIndex( "multi_index", From 6c0daf5349854a686cec91d4763ae11c00e7b02f Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Fri, 24 Apr 2026 13:46:20 +0300 Subject: [PATCH 03/20] Some PR fixes --- .../xpack/esql/analysis/Verifier.java | 4 +- .../PotentiallyUnmappedKeywordEsField.java | 4 +- .../esql/optimizer/LogicalPlanOptimizer.java | 4 +- ...elds.java => PropagateUnmappedFields.java} | 37 +++++++++++-------- 4 files changed, 27 insertions(+), 22 deletions(-) rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/{PropgateUnmappedFields.java => PropagateUnmappedFields.java} (65%) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 6fff8a0cb7d26..f386fa9b9402b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -590,11 +590,11 @@ private static AttributeSet partiallyUnmappedNonKeywords(LogicalPlan plan, Map { IndexResolution indexResolution = indexResolutions.get(new IndexPattern(relation.source(), relation.indexPattern())); if (indexResolution != null && indexResolution.isValid()) { - Set fieldNames = collectPotentiallyUnmappedNonKeywords(indexResolution.get().mapping()); + Set punkFieldNames = collectPotentiallyUnmappedNonKeywords(indexResolution.get().mapping()); for (Attribute attr : relation.output()) { // punk_field::long is fine; in this case, the FieldAttribute contains a MultiTypeEsField with the conversions. if (attr instanceof FieldAttribute fa - && fieldNames.contains(fa.fieldName().string()) + && punkFieldNames.contains(fa.fieldName().string()) && fa.field() instanceof MultiTypeEsField == false) { punks.add(fa); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java index 21ac986912fff..aba1c0c8eb190 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/PotentiallyUnmappedKeywordEsField.java @@ -10,7 +10,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import java.io.IOException; -import java.util.HashMap; +import java.util.Collections; /** * This class is used as a marker for fields that may be unmapped, where an unmapped field is a field which exists in the _source but is not @@ -19,7 +19,7 @@ */ public class PotentiallyUnmappedKeywordEsField extends KeywordEsField { public PotentiallyUnmappedKeywordEsField(String name) { - super(name, new HashMap<>(), true, Short.MAX_VALUE, false, false, TimeSeriesFieldType.UNKNOWN); + super(name, Collections.emptyMap(), true, Short.MAX_VALUE, false, false, TimeSeriesFieldType.UNKNOWN); } public PotentiallyUnmappedKeywordEsField(StreamInput in) throws IOException { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index cf0f5f9cc053e..1976d22f832d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -33,7 +33,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEvalFoldables; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateInlineEvals; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateNullable; -import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropgateUnmappedFields; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateUnmappedFields; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneColumns; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneEmptyAggregates; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneEmptyForkBranches; @@ -269,7 +269,7 @@ protected static Batch cleanup() { new ReplaceLimitAndSortAsTopN(), new HoistRemoteEnrichTopN(), new ReplaceRowAsLocalRelation(), - new PropgateUnmappedFields(), + new PropagateUnmappedFields(), new CombineLimitTopN(), new ReorderLimitProjectAndOrderBy() ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java similarity index 65% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java index b1a28c2ea8c05..fcfc9623f2e81 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java @@ -16,16 +16,18 @@ import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.rule.Rule; -import java.util.ArrayList; -import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Merges unmapped fields into the output of the ES relation. This marking is necessary for the block loaders to force loading from _source * if the field is unmapped. + * + * N.B. This is only used for INSIST keyword, so when INSIST is sunset, we can get rid of this rule! */ -public class PropgateUnmappedFields extends Rule { +public class PropagateUnmappedFields extends Rule { @Override public LogicalPlan apply(LogicalPlan logicalPlan) { if (logicalPlan instanceof EsRelation) { @@ -42,21 +44,24 @@ public LogicalPlan apply(LogicalPlan logicalPlan) { return logicalPlan; } return logicalPlan.transformUp(EsRelation.class, er -> { - Set existingPuks = new HashSet<>(); - for (Attribute attr : er.output()) { - if (attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField) { - existingPuks.add(fa.fieldName().string()); - } - } + Set existingPuks = er.output() + .stream() + .flatMap( + attr -> attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField + ? Stream.of(fa.fieldName().string()) + : Stream.empty() + ) + .collect(Collectors.toSet()); // Only propagate PUK fields that are not already in the relation's output, so we preserve the existing order. // Partially-mapped keyword fields are already wrapped as PUKs by the index resolver; this rule just merges in - // PUKs introduced elsewhere (e.g. by INSIST on a field that is not in the index). - List missing = new ArrayList<>(); - for (Attribute attr : unmappedFields) { - if (attr instanceof FieldAttribute fa && existingPuks.contains(fa.fieldName().string()) == false) { - missing.add(fa); - } - } + // PUKs introduced elsewhere (e.g., by INSIST on a field that is not in the index). + List missing = unmappedFields.stream() + .flatMap( + attr -> attr instanceof FieldAttribute fa && existingPuks.contains(fa.fieldName().string()) == false + ? Stream.of(attr) + : Stream.empty() + ) + .collect(Collectors.toList()); return missing.isEmpty() ? er : er.withAttributes(NamedExpressions.mergeOutputAttributes(missing, er.output())); }); } From eb86e49ae0f5ed1debb35f01b4e814fb4d79d7b0 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Fri, 24 Apr 2026 14:09:18 +0300 Subject: [PATCH 04/20] More PR fixes --- .../logical/PropagateUnmappedFields.java | 47 +++++++++---------- .../xpack/esql/session/IndexResolver.java | 19 ++++++-- .../xpack/esql/optimizer/GoldenTestCase.java | 33 ++++++------- 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java index fcfc9623f2e81..b96b777f0a19c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateUnmappedFields.java @@ -40,29 +40,28 @@ public LogicalPlan apply(LogicalPlan logicalPlan) { } }); var unmappedFields = unmappedFieldsBuilder.build(); - if (unmappedFields.isEmpty()) { - return logicalPlan; - } - return logicalPlan.transformUp(EsRelation.class, er -> { - Set existingPuks = er.output() - .stream() - .flatMap( - attr -> attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField - ? Stream.of(fa.fieldName().string()) - : Stream.empty() - ) - .collect(Collectors.toSet()); - // Only propagate PUK fields that are not already in the relation's output, so we preserve the existing order. - // Partially-mapped keyword fields are already wrapped as PUKs by the index resolver; this rule just merges in - // PUKs introduced elsewhere (e.g., by INSIST on a field that is not in the index). - List missing = unmappedFields.stream() - .flatMap( - attr -> attr instanceof FieldAttribute fa && existingPuks.contains(fa.fieldName().string()) == false - ? Stream.of(attr) - : Stream.empty() - ) - .collect(Collectors.toList()); - return missing.isEmpty() ? er : er.withAttributes(NamedExpressions.mergeOutputAttributes(missing, er.output())); - }); + return unmappedFields.isEmpty() ? logicalPlan : logicalPlan.transformUp(EsRelation.class, er -> mergeMissing(er, unmappedFields)); + } + + private static EsRelation mergeMissing(EsRelation er, AttributeSet unmappedFields) { + Set existingPuks = er.output() + .stream() + .flatMap( + attr -> attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField + ? Stream.of(fa.fieldName().string()) + : Stream.empty() + ) + .collect(Collectors.toUnmodifiableSet()); + // Only propagate PUK fields that are not already in the relation's output, so we preserve the existing order. + // Partially-mapped keyword fields are already wrapped as PUKs by the index resolver; this rule just merges in + // PUKs introduced elsewhere (e.g., by INSIST on a field that is not in the index). + List missing = unmappedFields.stream() + .flatMap( + attr -> attr instanceof FieldAttribute fa && existingPuks.contains(fa.fieldName().string()) == false + ? Stream.of(attr) + : Stream.empty() + ) + .toList(); + return missing.isEmpty() ? er : er.withAttributes(NamedExpressions.mergeOutputAttributes(missing, er.output())); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index b9fff9612647a..482b01a5f3e3d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -365,11 +365,9 @@ public static IndexResolution mergedMappings( firstUnsupportedParent.getName(), new HashMap<>() ); - if (trackUnmappedFieldIndices && field instanceof UnsupportedEsField == false) { + if (trackUnmappedFieldIndices) { Set mappedIndices = collectedFieldCaps.fieldToMappedIndices.getOrDefault(fullName, Set.of()); - if (mappedIndices.size() < numberOfIndices) { - field = wrapPartiallyUnmappedField(field, name, fullName, mappedIndices); - } + field = wrapIfPartiallyUnmapped(field, name, fullName, mappedIndices, numberOfIndices); } fields.put(name, field); } @@ -517,6 +515,19 @@ private static EsField createField( return new EsField(name, type, new HashMap<>(), aggregatable, isAlias, timeSeriesFieldType); } + // Visible for testing. + public static EsField wrapIfPartiallyUnmapped( + EsField field, + String name, + String fullName, + Set mappedIndices, + int numberOfIndices + ) { + return field instanceof UnsupportedEsField == false && mappedIndices.size() < numberOfIndices + ? wrapPartiallyUnmappedField(field, name, fullName, mappedIndices) + : field; + } + // Visible for testing public static EsField wrapPartiallyUnmappedField(EsField field, String name, String fullName, Set mappedIndices) { return switch (field.getDataType()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java index 1f8ee51fd8980..dc194a6f9f467 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/GoldenTestCase.java @@ -882,35 +882,36 @@ record MappingPerIndex(String index, Map mapping) {} record MergedResult(Map mapping) {} private static MergedResult mergeMappings(List mappingsPerIndex, boolean trackUnmappedFieldIndices) { - Map> columnNamesToFieldByIndices = new HashMap<>(); + Map> fieldNamesToFieldByIndices = new HashMap<>(); for (var mappingPerIndex : mappingsPerIndex) { for (var entry : mappingPerIndex.mapping().entrySet()) { - columnNamesToFieldByIndices.computeIfAbsent(entry.getKey(), k -> new HashMap<>()) + fieldNamesToFieldByIndices.computeIfAbsent(entry.getKey(), k -> new HashMap<>()) .put(mappingPerIndex.index(), entry.getValue()); } } int numberOfIndices = mappingsPerIndex.size(); - var mappings = columnNamesToFieldByIndices.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> { - EsField field = mergeFields(e.getKey(), e.getValue()); - if (trackUnmappedFieldIndices && e.getValue().size() < numberOfIndices) { - field = IndexResolver.wrapPartiallyUnmappedField(field, e.getKey(), e.getKey(), e.getValue().keySet()); - } - return field; + var mappings = fieldNamesToFieldByIndices.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> { + String fieldName = e.getKey(); + Map indexToFields = e.getValue(); + EsField field = mergeFields(fieldName, indexToFields); + return trackUnmappedFieldIndices + ? IndexResolver.wrapIfPartiallyUnmapped(field, fieldName, fieldName, indexToFields.keySet(), numberOfIndices) + : field; })); return new MergedResult(mappings); } - private static EsField mergeFields(String index, Map columnNameToField) { - var indexFields = columnNameToField.values(); - if (indexFields.stream().distinct().count() > 1) { + private static EsField mergeFields(String fieldName, Map indexToFields) { + var fields = indexToFields.values(); + if (fields.stream().distinct().count() > 1) { var typesToIndices = new HashMap>(); - for (var typeToIndex : columnNameToField.entrySet()) { - typesToIndices.computeIfAbsent(typeToIndex.getValue().getDataType().typeName(), k -> new HashSet<>()) - .add(typeToIndex.getKey()); + for (var indexToField : indexToFields.entrySet()) { + typesToIndices.computeIfAbsent(indexToField.getValue().getDataType().typeName(), k -> new HashSet<>()) + .add(indexToField.getKey()); } - return new InvalidMappedField(index, typesToIndices); + return new InvalidMappedField(fieldName, typesToIndices); } else { - return indexFields.iterator().next(); + return fields.iterator().next(); } } } From 1b0e7ce302257a70aa71e1e785ddd5c0cc7e90e3 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Mon, 27 Apr 2026 15:31:54 +0300 Subject: [PATCH 05/20] Add failing tests --- .../src/main/resources/unmapped-load.csv-spec | 16 ++++++++++++++++ .../esql/analysis/AnalyzerUnmappedTests.java | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec index 903498dc87ef4..07e468bf88b64 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec @@ -537,6 +537,22 @@ sample_data | Connected to 10.1.0.2 sample_data | Connected to 10.1.0.1 ; +partiallyUnmappedSmallNumericFieldIsWidened +required_capability: optional_fields_v5 + +SET unmapped_fields="load"\; +FROM apps_short, partial_mapping_sample_data +| KEEP id, name +| SORT name DESC NULLS LAST +| LIMIT 3 +; + +id:integer | name:keyword +14 | mmmmm +13 | lllll +11 | kkkkk +; + fieldIsPartiallyUnmappedAndRenamedMultiIndex required_capability: optional_fields_v5 diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java index 3ee5c344b9a6d..2b4c9ef3c8fc1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java @@ -56,6 +56,7 @@ import static org.elasticsearch.xpack.esql.analysis.AnalyzerTests.withInlinestatsWarning; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -898,6 +899,19 @@ public void testPartiallyMappedNonKeywordFieldsMarkedAsPotentiallyUnmapped() { } } + public void testWrapPartiallyUnmappedFieldWidensSmallNumerics() { + Set mappedIndices = Set.of("idx_mapped"); + for (DataType smallNumeric : List.of(DataType.SHORT, DataType.BYTE, DataType.FLOAT, DataType.HALF_FLOAT, DataType.SCALED_FLOAT)) { + EsField field = new EsField("f", smallNumeric, emptyMap(), true, EsField.TimeSeriesFieldType.NONE); + InvalidMappedField wrapped = (InvalidMappedField) IndexResolver.wrapPartiallyUnmappedField(field, "f", "f", mappedIndices); + assertThat( + "Partially-unmapped " + smallNumeric + " field should be stored under its widened type name", + wrapped.getTypesToIndices(), + equalTo(Map.of(smallNumeric.widenSmallNumeric().typeName(), mappedIndices)) + ); + } + } + public void testTbucketWithUnmappedTimestampWithLookupJoin() { var query = """ FROM test From b7ba0f57ed14950e8c2817a9e9f7161f01c539a0 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Mon, 27 Apr 2026 23:35:27 +0300 Subject: [PATCH 06/20] ESQL: Compact union type representation in plans Introduce two memory-frugal variants of the EsField subtypes used for union-typed fields and route the analyzer to them on capable transport versions: * MultiTypeEsField2 keys conversion expressions by data type (plus an optional unmapped conversion) instead of per-index, dropping the conversion map from O(num_indices) to O(num_types) per field. * InvalidMappedField2 keeps at most three indices per type plus a "..." sentinel; the error message is rendered from the full input before truncation, so users still get "[a, b, c] and [N] other indices". * A common UnionTypeEsField interface lets call sites work with either flavor without instanceof chains. * The analyzer threads the minimum transport version through and produces the new representations when ESQL_MULTI_TYPE_ES_FIELD_2 is supported, falling back to the legacy classes otherwise. For a plan with 50 union-typed fields each conflicting across 5,000 indices, retained memory drops ~35x (14.4 MB -> 407 KB), as exercised by the new MultiTypeEsFieldMemoryTests. Made-with: Cursor --- .../referable/esql_multi_type_es_field_2.csv | 1 + .../resources/transport/upper_bounds/9.5.csv | 2 +- .../xpack/esql/analysis/Analyzer.java | 158 +++++++++++----- .../xpack/esql/analysis/Verifier.java | 4 +- .../xpack/esql/core/type/EsField.java | 1 + .../esql/core/type/InvalidMappedField.java | 4 +- .../esql/core/type/InvalidMappedField2.java | 79 ++++++++ .../esql/core/type/MultiTypeEsField.java | 7 +- .../esql/core/type/MultiTypeEsField2.java | 177 ++++++++++++++++++ .../esql/core/type/UnionTypeEsField.java | 28 +++ .../function/fulltext/FullTextFunction.java | 12 +- .../convert/FromAggregateMetricDouble.java | 4 +- .../ReplaceDateTruncBucketWithRoundTo.java | 4 +- .../local/ReplaceFieldWithConstantOrNull.java | 4 +- .../planner/EsPhysicalOperationProviders.java | 31 ++- .../xpack/esql/analysis/AnalyzerTests.java | 4 +- .../LocalPhysicalPlanOptimizerTests.java | 4 +- .../esql/type/InvalidMappedField2Tests.java | 112 +++++++++++ .../esql/type/MultiTypeEsField2Tests.java | 168 +++++++++++++++++ .../type/MultiTypeEsFieldMemoryTests.java | 129 +++++++++++++ 20 files changed, 851 insertions(+), 82 deletions(-) create mode 100644 server/src/main/resources/transport/definitions/referable/esql_multi_type_es_field_2.csv create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java diff --git a/server/src/main/resources/transport/definitions/referable/esql_multi_type_es_field_2.csv b/server/src/main/resources/transport/definitions/referable/esql_multi_type_es_field_2.csv new file mode 100644 index 0000000000000..1e122475cc639 --- /dev/null +++ b/server/src/main/resources/transport/definitions/referable/esql_multi_type_es_field_2.csv @@ -0,0 +1 @@ +9366000 diff --git a/server/src/main/resources/transport/upper_bounds/9.5.csv b/server/src/main/resources/transport/upper_bounds/9.5.csv index e16cac0347b3d..f04765e78d0b6 100644 --- a/server/src/main/resources/transport/upper_bounds/9.5.csv +++ b/server/src/main/resources/transport/upper_bounds/9.5.csv @@ -1 +1 @@ -inference_api_audio_video_pdf_support,9365000 +esql_multi_type_es_field_2,9366000 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index d951e6a8f0996..cda937e1fcaa7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -57,7 +57,9 @@ import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.core.util.CollectionUtils; import org.elasticsearch.xpack.esql.core.util.Holder; @@ -2243,23 +2245,45 @@ private static Expression processVectorFunction( * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule * (See {@link UnionTypesCleanup} below). */ - private static class ResolveUnionTypes extends Rule { + private static class ResolveUnionTypes extends ParameterizedRule { record TypeResolutionKey(String fieldName, DataType fieldType) {} + static boolean isMultiType(EsField field) { + return field instanceof MultiTypeEsField || field instanceof MultiTypeEsField2; + } + + /** + * Picks between the legacy {@link MultiTypeEsField} (index-keyed) and the new {@link MultiTypeEsField2} + * (type-keyed) based on the cluster minimum transport version, so that newly-built plans remain + * deserializable on older nodes during a rolling upgrade. + */ + static EsField buildMultiTypeEsField( + InvalidMappedField imf, + Map typesToConversionExpressions, + @Nullable Expression unmappedConversionExpression, + AnalyzerContext context + ) { + if (context.minimumVersion().supports(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2)) { + return MultiTypeEsField2.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression); + } + return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) + .withPotentiallyUnmappedExpression(unmappedConversionExpression); + } + @Override - public LogicalPlan apply(LogicalPlan plan) { + public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { List unionFieldAttributes = new ArrayList<>(); - return plan.transformUp(LogicalPlan.class, p -> p.childrenResolved() == false ? p : doRule(p, unionFieldAttributes)); + return plan.transformUp(LogicalPlan.class, p -> p.childrenResolved() == false ? p : doRule(p, unionFieldAttributes, context)); } - private LogicalPlan doRule(LogicalPlan plan, List unionFieldAttributes) { + private LogicalPlan doRule(LogicalPlan plan, List unionFieldAttributes, AnalyzerContext context) { Holder alreadyAddedUnionFieldAttributes = new Holder<>(unionFieldAttributes.size()); // Collect field attributes from previous runs if (plan instanceof EsRelation rel) { unionFieldAttributes.clear(); for (Attribute attr : rel.output()) { - if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField && fa.synthetic()) { + if (attr instanceof FieldAttribute fa && isMultiType(fa.field()) && fa.synthetic()) { unionFieldAttributes.add(fa.ignoreId()); } } @@ -2269,7 +2293,7 @@ private LogicalPlan doRule(LogicalPlan plan, List u // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge) plan = plan.transformExpressionsOnly(e -> { if (e instanceof ConvertFunction convert) { - return resolveConvertFunction(convert, unionFieldAttributes); + return resolveConvertFunction(convert, unionFieldAttributes, context); } return e; }); @@ -2322,7 +2346,11 @@ private static LogicalPlan addGeneratedFieldsToEsRelations(LogicalPlan plan, Lis return res; } - private Expression resolveConvertFunction(ConvertFunction convert, List unionFieldAttributes) { + private Expression resolveConvertFunction( + ConvertFunction convert, + List unionFieldAttributes, + AnalyzerContext context + ) { Expression convertExpression = (Expression) convert; if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { HashMap typeResolutions = new HashMap<>(); @@ -2347,62 +2375,84 @@ private Expression resolveConvertFunction(ConvertFunction convert, List supportedTypes = convert.supportedTypes(); - if (supportedTypes.contains(fa.dataType()) && canConvertOriginalTypes(mtf, supportedTypes)) { - // Build the mapping between index name and conversion expressions - Map indexToConversionExpressions = new HashMap<>(); - for (Map.Entry entry : mtf.getIndexToConversionExpressions().entrySet()) { - String indexName = entry.getKey(); - AbstractConvertFunction originalConversionFunction = (AbstractConvertFunction) entry.getValue(); - Expression originalField = originalConversionFunction.field(); - Expression newConvertFunction = convertExpression.replaceChildren(Collections.singletonList(originalField)); - indexToConversionExpressions.put(indexName, newConvertFunction); - } + if (supportedTypes.contains(fa.dataType()) && canConvertOriginalTypes(fa.field(), supportedTypes)) { // The only code that creates MultiTypeEsField with synthetic=false (reaching this branch) is // DateMillisToNanosInEsRelation, which runs in the "Initialize" batch before ResolveUnmapped. At that point, // unmapped fields haven't been detected yet, so potentiallyUnmappedExpression is always null. - if (mtf.getPotentiallyUnmappedExpression() != null) { + if (((UnionTypeEsField) fa.field()).getUnmappedConversionExpression() != null) { throw new IllegalStateException("Unexpected potentially unmapped expression for [" + fa.fieldName() + "]"); } - MultiTypeEsField multiTypeEsField = new MultiTypeEsField( - fa.fieldName().string(), - convertExpression.dataType(), - false, - indexToConversionExpressions, - fa.field().getTimeSeriesFieldType(), - null - ); + EsField multiTypeEsField = rewrapWithCast(fa, convertExpression, context); return createIfDoesNotAlreadyExist(fa, multiTypeEsField, unionFieldAttributes); } } else if (convert.field() instanceof AbstractConvertFunction subConvert) { return convertExpression.replaceChildren( - Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)) + Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes, context)) ); } return convertExpression; } + private static EsField rewrapWithCast(FieldAttribute fa, Expression convertExpression, AnalyzerContext context) { + // Wraps an existing union-type field's per-(index|type) conversions with another conversion expression on top, so the + // composite expression first does the original cast then the additional cast. Works for both legacy and v2 MultiTypeEsField. + if (fa.field() instanceof MultiTypeEsField mtf) { + Map indexToConversionExpressions = new HashMap<>(); + for (Map.Entry entry : mtf.getIndexToConversionExpressions().entrySet()) { + indexToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); + } + return new MultiTypeEsField( + fa.fieldName().string(), + convertExpression.dataType(), + false, + indexToConversionExpressions, + fa.field().getTimeSeriesFieldType(), + null + ); + } else { + MultiTypeEsField2 mtf = (MultiTypeEsField2) fa.field(); + Map typeToConversionExpressions = new HashMap<>(); + for (Map.Entry entry : mtf.getTypeToConversionExpressions().entrySet()) { + typeToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); + } + return new MultiTypeEsField2( + fa.fieldName().string(), + convertExpression.dataType(), + false, + typeToConversionExpressions, + fa.field().getTimeSeriesFieldType(), + null + ); + } + } + + private static Expression wrapWith(Expression convertExpression, Expression originalConvertFunction) { + AbstractConvertFunction inner = (AbstractConvertFunction) originalConvertFunction; + return convertExpression.replaceChildren(Collections.singletonList(inner.field())); + } + private Expression createIfDoesNotAlreadyExist( FieldAttribute fa, - MultiTypeEsField resolvedField, + EsField resolvedField, List unionFieldAttributes ) { // Generate new ID for the field and suffix it with the data type to maintain unique attribute names. @@ -2429,10 +2479,11 @@ private Expression createIfDoesNotAlreadyExist( } } - private static MultiTypeEsField resolvedMultiTypeEsField( + static EsField resolvedMultiTypeEsField( FieldAttribute fa, Map typeResolutions, - @Nullable Expression potentiallyUnmappedConversion + @Nullable Expression potentiallyUnmappedConversion, + AnalyzerContext context ) { Map typesToConversionExpressions = new HashMap<>(); InvalidMappedField imf = (InvalidMappedField) fa.field(); @@ -2443,13 +2494,14 @@ private static MultiTypeEsField resolvedMultiTypeEsField( typesToConversionExpressions.put(typeName, typeResolutions.get(key)); } }); - return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) - .withPotentiallyUnmappedExpression(potentiallyUnmappedConversion); + return buildMultiTypeEsField(imf, typesToConversionExpressions, potentiallyUnmappedConversion, context); } - private static boolean canConvertOriginalTypes(MultiTypeEsField multiTypeEsField, Set supportedTypes) { - return multiTypeEsField.getIndexToConversionExpressions() - .values() + private static boolean canConvertOriginalTypes(EsField multiTypeEsField, Set supportedTypes) { + Map conversionExpressions = multiTypeEsField instanceof MultiTypeEsField legacy + ? legacy.getIndexToConversionExpressions() + : ((MultiTypeEsField2) multiTypeEsField).getTypeToConversionExpressions(); + return conversionExpressions.values() .stream() .allMatch( e -> e instanceof AbstractConvertFunction convertFunction @@ -2571,7 +2623,7 @@ public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { // potentiallyUnmapped fields. This assertion guards against future changes breaking that invariant. assert imf.isPotentiallyUnmapped() == false : "Unexpected potentially unmapped field [" + imf.getName() + "] in DateMillisToNanosInEsRelation"; - var resolvedField = ResolveUnionTypes.resolvedMultiTypeEsField(f, typeResolutions, null); + var resolvedField = ResolveUnionTypes.resolvedMultiTypeEsField(f, typeResolutions, null, context); return new FieldAttribute( f.source(), f.parentName(), @@ -2618,19 +2670,19 @@ private static void typeResolutions( * are aggregate metric double + any combination of numerics, implicitly cast them to the same type: aggregate metric * double for count, and double for min, max, and sum. Avg gets replaced with its surrogate (Div(Sum, Count)) */ - private static class ImplicitCastAggregateMetricDoubles extends Rule { + private static class ImplicitCastAggregateMetricDoubles extends ParameterizedRule { private boolean isTimeSeries = false; @Override - public LogicalPlan apply(LogicalPlan plan) { + public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { Holder indexMode = new Holder<>(IndexMode.STANDARD); plan.forEachUp(EsRelation.class, esRelation -> { indexMode.set(esRelation.indexMode()); }); isTimeSeries = indexMode.get() == IndexMode.TIME_SERIES; - return plan.transformUp(LogicalPlan.class, this::doRule); + return plan.transformUp(LogicalPlan.class, p -> doRule(p, context)); } - private LogicalPlan doRule(LogicalPlan plan) { + private LogicalPlan doRule(LogicalPlan plan, AnalyzerContext context) { if (plan instanceof EsRelation || plan instanceof Project || plan.childrenResolved() == false) { return plan; } @@ -2639,17 +2691,17 @@ private LogicalPlan doRule(LogicalPlan plan) { var newPlan = plan.transformExpressionsOnly(AggregateFunction.class, aggFunc -> { Expression child; if (aggFunc.field() instanceof ToAggregateMetricDouble toAMD) { - child = tryToTransformFunction(aggFunc, toAMD.field(), aborted, unionFields); + child = tryToTransformFunction(aggFunc, toAMD.field(), aborted, unionFields, context); } else { - child = tryToTransformFunction(aggFunc, aggFunc.field(), aborted, unionFields); + child = tryToTransformFunction(aggFunc, aggFunc.field(), aborted, unionFields, context); } return child; }).transformExpressionsOnly(EsqlBinaryComparison.class, comparison -> { Expression left = comparison.left(); Expression right = comparison.right(); Holder modified = new Holder<>(Boolean.FALSE); - left = tryToTransformBinaryComparison(comparison, left, modified, unionFields); - right = tryToTransformBinaryComparison(comparison, right, modified, unionFields); + left = tryToTransformBinaryComparison(comparison, left, modified, unionFields, context); + right = tryToTransformBinaryComparison(comparison, right, modified, unionFields, context); if (modified.get() == false) { return comparison; } @@ -2665,7 +2717,8 @@ private Expression tryToTransformBinaryComparison( EsqlBinaryComparison comparison, Expression original, Holder modified, - Map unionFields + Map unionFields, + AnalyzerContext context ) { if (original instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf && canBeCasted(imf)) { Map typeConverters = new HashMap<>(); @@ -2683,7 +2736,7 @@ private Expression tryToTransformBinaryComparison( fa.parentName(), fa.qualifier(), newName, - MultiTypeEsField.resolveFrom(imf, typeConverters), + ResolveUnionTypes.buildMultiTypeEsField(imf, typeConverters, null, context), fa.nullable(), null, true @@ -2704,7 +2757,8 @@ private Expression tryToTransformFunction( AggregateFunction aggFunc, Expression field, Holder aborted, - Map unionFields + Map unionFields, + AnalyzerContext context ) { if (field instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { if (canBeCasted(imf) == false) { @@ -2736,7 +2790,7 @@ private Expression tryToTransformFunction( fa.parentName(), fa.qualifier(), newName, - MultiTypeEsField.resolveFrom(imf, typeConverters), + ResolveUnionTypes.buildMultiTypeEsField(imf, typeConverters, null, context), fa.nullable(), null, true diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index f2d4db5c261f2..2cd7a7f60f4eb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -34,8 +34,8 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.TimestampAware; @@ -620,7 +620,7 @@ private static AttributeSet partiallyUnmappedNonKeywords(LogicalPlan plan, Map properties, @@ -142,7 +142,7 @@ public boolean isPotentiallyUnmapped() { return isPotentiallyUnmapped; } - private static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { + static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { StringBuilder errorMessage = new StringBuilder(); var isInsistKeywordOnlyKeyword = includeInsistKeyword && typesToIndices.containsKey(DataType.KEYWORD.typeName()) == false; errorMessage.append("mapped as ["); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java new file mode 100644 index 0000000000000..84c906946a68d --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.core.type; + +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +/** + * Memory-frugal variant of {@link InvalidMappedField}: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source type + * (plus the {@value #ELLIPSIS} sentinel when more existed) instead of the full per-type index list. Wide union-typed fields routinely span + * thousands of indices but the only consumers that need the full list are the legacy index-keyed conversion structures, and they aren't + * used on transport versions that support {@link MultiTypeEsField2}. Truncating here lets the analyzed plan stay small while still + * producing a good "[a, b, c, ...]" error message: the message itself is rendered from the full input map at construction time and then + * stored as a string, so we lose only the post-construction ability to enumerate every index. + * + *

Just like its parent, the {@code typesToIndices} map is not sent over the wire (it only matters during analysis on the coordinator), + * so {@code InvalidMappedField2} serializes identically to {@link InvalidMappedField}. Round-tripping through the wire therefore yields a + * plain {@link InvalidMappedField} on the receiving side — that's fine because {@code typesToIndices} is empty after deserialization + * anyway, so the truncation no longer matters. + */ +public class InvalidMappedField2 extends InvalidMappedField { + + public static final String ELLIPSIS = "..."; + public static final int MAX_INDICES_PER_TYPE = 3; + + public InvalidMappedField2(String name, Map> typesToIndices) { + super(name, makeErrorMessage(typesToIndices, false), new TreeMap<>(), truncate(typesToIndices), false, TimeSeriesFieldType.UNKNOWN); + } + + public static InvalidMappedField2 potentiallyUnmapped(String name, Map> typesToIndices) { + return new InvalidMappedField2( + name, + makeErrorMessage(typesToIndices, true), + new TreeMap<>(), + truncate(typesToIndices), + true, + TimeSeriesFieldType.UNKNOWN + ); + } + + private InvalidMappedField2( + String name, + String errorMessage, + Map properties, + Map> typesToIndices, + boolean isPotentiallyUnmapped, + TimeSeriesFieldType type + ) { + super(name, errorMessage, properties, typesToIndices, isPotentiallyUnmapped, type); + } + + /** + * Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries, appending the {@value #ELLIPSIS} sentinel iff anything was + * dropped. The retained 3 are picked by sorted order so that the (already truncated) error message and the stored set stay + * consistent. + */ + static Map> truncate(Map> typesToIndices) { + Map> result = new TreeMap<>(); + for (Map.Entry> entry : typesToIndices.entrySet()) { + Set indices = entry.getValue(); + if (indices.size() <= MAX_INDICES_PER_TYPE) { + result.put(entry.getKey(), Set.copyOf(indices)); + } else { + Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); + indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); + truncated.add(ELLIPSIS); + result.put(entry.getKey(), Set.copyOf(truncated)); + } + } + return result; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index d825d13c32763..42cac052be5aa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -31,7 +31,7 @@ * this class instead of the {@link InvalidMappedField}. * This class is sent to the data nodes to inform them that they have to convert the type directly during field extraction. */ -public class MultiTypeEsField extends EsField { +public class MultiTypeEsField extends EsField implements UnionTypeEsField { private static final TransportVersion POTENTIALLY_UNMAPPED_EXPRESSION = TransportVersion.fromName( "esql_potentially_unmapped_expression" ); @@ -94,6 +94,11 @@ public String getNodeStringName() { return potentiallyUnmappedExpression; } + @Override + public @Nullable Expression getUnmappedConversionExpression() { + return potentiallyUnmappedExpression; + } + public Map getIndexToConversionExpressions() { return indexToConversionExpressions; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java new file mode 100644 index 0000000000000..58791a3ff0156 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.core.type; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Memory-efficient variant of {@link MultiTypeEsField} that stores the per-source-type conversion + * expressions directly, rather than expanding them to one entry per index. Plus an optional + * {@code unmappedConversionExpression} for indices in which the field is unmapped (treated as + * {@link DataType#KEYWORD}). On a data node, the conversion expression is looked up by the field's + * locally-resolved data type. + * + *

This is the on-the-wire successor to {@link MultiTypeEsField}; the analyzer falls back to the + * legacy {@link MultiTypeEsField} when the cluster minimum transport version does not yet support + * {@code esql_multi_type_es_field_2}. + */ +public class MultiTypeEsField2 extends EsField implements UnionTypeEsField { + + public static final TransportVersion ESQL_MULTI_TYPE_ES_FIELD_2 = TransportVersion.fromName("esql_multi_type_es_field_2"); + + private final Map typeToConversionExpressions; + + @Nullable + private final Expression unmappedConversionExpression; + + public MultiTypeEsField2( + String name, + DataType dataType, + boolean aggregatable, + Map typeToConversionExpressions, + TimeSeriesFieldType timeSeriesFieldType, + @Nullable Expression unmappedConversionExpression + ) { + super(name, dataType, Map.of(), aggregatable, timeSeriesFieldType); + this.typeToConversionExpressions = typeToConversionExpressions; + this.unmappedConversionExpression = unmappedConversionExpression; + } + + protected MultiTypeEsField2(StreamInput in) throws IOException { + this( + ((PlanStreamInput) in).readCachedString(), + DataType.readFrom(in), + in.readBoolean(), + in.readImmutableMap(i -> i.readNamedWriteable(Expression.class)), + readTimeSeriesFieldType(in), + in.readOptionalNamedWriteable(Expression.class) + ); + } + + @Override + public void writeContent(StreamOutput out) throws IOException { + ((PlanStreamOutput) out).writeCachedString(getName()); + getDataType().writeTo(out); + out.writeBoolean(isAggregatable()); + out.writeMap(typeToConversionExpressions, (o, v) -> out.writeNamedWriteable(v)); + writeTimeSeriesFieldType(out); + out.writeOptionalNamedWriteable(unmappedConversionExpression); + } + + @Override + public String getWriteableName(TransportVersion transportVersion) { + return "MultiTypeEsField2"; + } + + @Override + public String getNodeStringName() { + return "MultiTypeEsField2"; + } + + public Map getTypeToConversionExpressions() { + return typeToConversionExpressions; + } + + /** + * Returns the conversion expression to apply for the given source {@link DataType}, or {@code null} + * if no conversion is registered for that type. Callers should fall back to + * {@link #getUnmappedConversionExpression()} when the field is unmapped in the local index. + */ + public @Nullable Expression getConversionExpressionForType(DataType type) { + return typeToConversionExpressions.get(type.typeName()); + } + + @Override + public @Nullable Expression getUnmappedConversionExpression() { + return unmappedConversionExpression; + } + + public MultiTypeEsField2 withUnmappedConversionExpression(@Nullable Expression unmappedConversionExpression) { + return new MultiTypeEsField2( + getName(), + getDataType(), + isAggregatable(), + typeToConversionExpressions, + getTimeSeriesFieldType(), + unmappedConversionExpression + ); + } + + /** + * Build a {@link MultiTypeEsField2} from the per-type resolutions previously computed against an + * {@link InvalidMappedField}. Only types present in {@code imf.getTypesToIndices()} for which a + * conversion was supplied are included. + */ + public static MultiTypeEsField2 resolveFrom( + InvalidMappedField imf, + Map typesToConversionExpressions, + @Nullable Expression unmappedConversionExpression + ) { + Map> typesToIndices = imf.getTypesToIndices(); + DataType resolvedDataType = DataType.UNSUPPORTED; + Map filtered = new HashMap<>(); + for (String typeName : typesToIndices.keySet()) { + Expression convertExpr = typesToConversionExpressions.get(typeName); + if (convertExpr == null) { + continue; + } + if (resolvedDataType == DataType.UNSUPPORTED) { + resolvedDataType = convertExpr.dataType(); + } else if (resolvedDataType != convertExpr.dataType()) { + throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType()); + } + filtered.put(typeName, convertExpr); + } + if (resolvedDataType == DataType.UNSUPPORTED && unmappedConversionExpression != null) { + resolvedDataType = unmappedConversionExpression.dataType(); + } + return new MultiTypeEsField2( + imf.getName(), + resolvedDataType, + false, + filtered, + imf.getTimeSeriesFieldType(), + unmappedConversionExpression + ); + } + + @Override + public boolean equals(Object obj) { + if (super.equals(obj) == false) { + return false; + } + if (obj instanceof MultiTypeEsField2 other) { + return typeToConversionExpressions.equals(other.typeToConversionExpressions) + && Objects.equals(unmappedConversionExpression, other.unmappedConversionExpression); + } + return false; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), typeToConversionExpressions, unmappedConversionExpression); + } + + @Override + public String toString() { + return Strings.format("%s (%s, %s)", super.toString(), typeToConversionExpressions, unmappedConversionExpression); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java new file mode 100644 index 0000000000000..e4d9008ad09fe --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.core.type; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xpack.esql.core.expression.Expression; + +/** + * Common interface implemented by both {@link MultiTypeEsField} (legacy, keyed by index name) and + * {@link MultiTypeEsField2} (newer, keyed by source data type) so that callers that only care about + * the existence of a per-(index|type) conversion or about the unmapped-side conversion can treat the + * two implementations uniformly. + */ +public interface UnionTypeEsField { + + /** + * Conversion expression to apply when the field is unmapped in the index, treating it as + * {@link DataType#KEYWORD}, or {@code null} if there is no such conversion (i.e. unmapped indices + * should produce {@code null}). + */ + @Nullable + Expression getUnmappedConversionExpression(); +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index ced68bff788e8..10f474f4f26ae 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -36,7 +36,7 @@ import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; @@ -516,13 +516,13 @@ private IndexedByShardId toShardConfigs(IndexedByShardId new ShardConfig(sc.toQuery(evaluatorQueryBuilder()), sc.searcher())); } - // TODO: this should likely be replaced by calls to FieldAttribute#fieldName; the MultiTypeEsField case looks - // wrong if `fieldAttribute` is a subfield, e.g. `parent.child` - multiTypeEsField#getName will just return `child`. + // TODO: this should likely be replaced by calls to FieldAttribute#fieldName; the UnionTypeEsField case looks + // wrong if `fieldAttribute` is a subfield, e.g. `parent.child` - EsField#getName will just return `child`. protected String getNameFromFieldAttribute(FieldAttribute fieldAttribute) { String fieldName = fieldAttribute.name(); - if (fieldAttribute.field() instanceof MultiTypeEsField multiTypeEsField) { - // If we have multiple field types, we allow the query to be done, but getting the underlying field name - fieldName = multiTypeEsField.getName(); + if (fieldAttribute.field() instanceof UnionTypeEsField) { + // If we have multiple field types, we allow the query to be done, but get the underlying field name + fieldName = fieldAttribute.field().getName(); } return fieldName; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FromAggregateMetricDouble.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FromAggregateMetricDouble.java index 96ee4d9030b95..024a99abfd7e4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FromAggregateMetricDouble.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FromAggregateMetricDouble.java @@ -29,7 +29,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.blockloader.BlockLoaderExpression; @@ -201,7 +201,7 @@ public Set supportedTypes() { public PushedBlockLoaderExpression tryPushToFieldLoading(SearchStats stats) { if (field() instanceof FieldAttribute f && f.dataType() == AGGREGATE_METRIC_DOUBLE - && (f.field() instanceof MultiTypeEsField) == false) { + && (f.field() instanceof UnionTypeEsField) == false) { var folded = subfieldIndex.fold(FoldContext.small()); if (folded == null) { throw new IllegalArgumentException("Subfield Index was null"); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceDateTruncBucketWithRoundTo.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceDateTruncBucketWithRoundTo.java index 3fa7c1b57a9a7..f1ada34dfd382 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceDateTruncBucketWithRoundTo.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceDateTruncBucketWithRoundTo.java @@ -19,7 +19,7 @@ import org.elasticsearch.xpack.esql.core.expression.function.Function; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc; @@ -96,7 +96,7 @@ private RoundTo maybeSubstituteWithRoundTo( Eval eval, TriFunction roundingFunction ) { - if (field instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField == false && isDateTime(fa.dataType())) { + if (field instanceof FieldAttribute fa && fa.field() instanceof UnionTypeEsField == false && isDateTime(fa.dataType())) { DataType fieldType = fa.dataType(); FieldAttribute.FieldName fieldName = fa.fieldName(); // Extract min/max from SearchStats diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstantOrNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstantOrNull.java index 67a5f2965a888..e92f6d600c6bf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstantOrNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstantOrNull.java @@ -17,8 +17,8 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.TimeSeriesMetadataAttribute; import org.elasticsearch.xpack.esql.core.type.MissingEsField; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; import org.elasticsearch.xpack.esql.optimizer.rules.RuleUtils; @@ -99,7 +99,7 @@ else if (esRelation.indexMode() == IndexMode.STANDARD) { private static boolean isPotentiallyUnmapped(FieldAttribute f) { return f.field() instanceof PotentiallyUnmappedKeywordEsField - || (f.field() instanceof MultiTypeEsField mtf && mtf.getPotentiallyUnmappedExpression() != null); + || (f.field() instanceof UnionTypeEsField utf && utf.getUnmappedConversionExpression() != null); } private LogicalPlan replaceWithNullOrConstant( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 85bedb48fcf7b..da72a87016d22 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -84,7 +84,9 @@ import org.elasticsearch.xpack.esql.core.type.FunctionEsField; import org.elasticsearch.xpack.esql.core.type.KeywordEsField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.expression.function.BlockLoaderWarnings; import org.elasticsearch.xpack.esql.expression.function.blockloader.BlockLoaderExpression; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; @@ -99,6 +101,7 @@ import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.PhysicalOperation; import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; import org.elasticsearch.xpack.esql.stats.SearchStats; +import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.util.ArrayList; @@ -249,7 +252,7 @@ private ValuesSourceReaderOperator.LoaderAndConverter blockLoaderAndConverter( functionConfig = functionEsField.functionConfig(); } boolean isUnsupported = attr.dataType() == DataType.UNSUPPORTED; - MultiTypeEsField unionTypes = findUnionTypes(attr); + UnionTypeEsField unionTypes = findUnionTypes(attr); if (unionTypes == null) { BlockLoader blockLoader = shardContext.blockLoader( fieldName, @@ -262,11 +265,23 @@ private ValuesSourceReaderOperator.LoaderAndConverter blockLoaderAndConverter( ); return ValuesSourceReaderOperator.load(blockLoader); } - // Use the fully qualified name `cluster:index-name` because multiple types are resolved on coordinator with the cluster prefix - String indexName = shardContext.ctx.getFullyQualifiedIndex().getName(); - Expression conversion = unionTypes.getConversionExpressionForIndex(indexName); + Expression conversion; + if (unionTypes instanceof MultiTypeEsField legacy) { + // Use the fully qualified name `cluster:index-name` because multiple types are resolved on coordinator with the cluster prefix + String indexName = shardContext.ctx.getFullyQualifiedIndex().getName(); + conversion = legacy.getConversionExpressionForIndex(indexName); + } else { + // Type-keyed lookup: resolve the field's local data type from the shard context, falling back to "unmapped" when the field has + // no local mapping. The conversion map is keyed by DataType.typeName(). + MappedFieldType mft = shardContext.fieldType(fieldName); + conversion = mft == null + ? null + : ((MultiTypeEsField2) unionTypes).getConversionExpressionForType( + EsqlDataTypeRegistry.INSTANCE.fromEs(mft.typeName(), mft.getMetricType()) + ); + } if (conversion == null) { - Expression potentiallyUnmapped = unionTypes.getPotentiallyUnmappedExpression(); + Expression potentiallyUnmapped = unionTypes.getUnmappedConversionExpression(); if (!(potentiallyUnmapped instanceof AbstractConvertFunction convert)) { return ValuesSourceReaderOperator.LOAD_CONSTANT_NULLS; } @@ -394,9 +409,9 @@ static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext } } - private static @Nullable MultiTypeEsField findUnionTypes(Attribute attr) { - if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField multiTypeEsField) { - return multiTypeEsField; + private static @Nullable UnionTypeEsField findUnionTypes(Attribute attr) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof UnionTypeEsField unionTypeEsField) { + return unionTypeEsField; } return null; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index a4e4545ce0e08..d2f72fc2ac6d9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -49,8 +49,8 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.enrich.ResolvedEnrichPolicy; import org.elasticsearch.xpack.esql.expression.Order; import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; @@ -6737,7 +6737,7 @@ private void verifyNameAndTypeAndMultiTypeEsField( } private boolean isMultiTypeEsField(Expression e) { - return e instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField; + return e instanceof FieldAttribute fa && fa.field() instanceof UnionTypeEsField; } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index f5c82f900cbb3..e91272b2a7ac8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -42,7 +42,7 @@ import org.elasticsearch.xpack.esql.core.expression.UnsupportedAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.Order; import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; @@ -2615,7 +2615,7 @@ public void testSortWithLimitBy() { } private boolean isMultiTypeEsField(Expression e) { - return e instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField; + return e instanceof FieldAttribute fa && fa.field() instanceof UnionTypeEsField; } private Stat queryStatsFor(PhysicalPlan plan) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java new file mode 100644 index 0000000000000..817ba7ebf721e --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java @@ -0,0 +1,112 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.type; + +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField2; + +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.not; + +public class InvalidMappedField2Tests extends ESTestCase { + + public void testKeepsAllIndicesWhenAtOrBelowLimit() { + Map> input = Map.of( + DataType.KEYWORD.typeName(), + new LinkedHashSet<>(Set.of("idx_a", "idx_b")), + DataType.LONG.typeName(), + new LinkedHashSet<>(Set.of("idx_c", "idx_d", "idx_e")) + ); + + InvalidMappedField2 field = new InvalidMappedField2("f", input); + + Map> stored = field.getTypesToIndices(); + assertThat(stored.keySet(), containsInAnyOrder(DataType.KEYWORD.typeName(), DataType.LONG.typeName())); + assertThat(stored.get(DataType.KEYWORD.typeName()), containsInAnyOrder("idx_a", "idx_b")); + assertThat(stored.get(DataType.LONG.typeName()), containsInAnyOrder("idx_c", "idx_d", "idx_e")); + for (Set indices : stored.values()) { + assertThat(indices, not(containsInAnyOrder(InvalidMappedField2.ELLIPSIS))); + } + } + + public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { + Set manyIndices = IntStream.range(0, 5_000) + .mapToObj(i -> String.format("idx_%05d", i)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + Map> input = Map.of(DataType.KEYWORD.typeName(), manyIndices); + + InvalidMappedField2 field = new InvalidMappedField2("f", input); + + Set stored = field.getTypesToIndices().get(DataType.KEYWORD.typeName()); + assertThat(stored, equalTo(Set.of("idx_00000", "idx_00001", "idx_00002", InvalidMappedField2.ELLIPSIS))); + } + + public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { + Set manyIndices = IntStream.range(0, 5_000) + .mapToObj(i -> String.format("idx_%05d", i)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + Map> input = new TreeMap<>(Map.of(DataType.KEYWORD.typeName(), manyIndices)); + + String message = new InvalidMappedField2("f", input).errorMessage(); + + assertThat(message, containsString("[1] incompatible types")); + assertThat(message, containsString("[idx_00000, idx_00001, idx_00002]")); + assertThat(message, containsString("[" + (5_000 - 3) + "] other indices")); + } + + public void testErrorMessageMatchesInvalidMappedFieldForSmallInputs() { + Map> input = new TreeMap<>( + Map.of( + DataType.KEYWORD.typeName(), + new LinkedHashSet<>(Set.of("idx_a", "idx_b")), + DataType.LONG.typeName(), + new LinkedHashSet<>(Set.of("idx_c")) + ) + ); + + assertThat(new InvalidMappedField2("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); + } + + public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { + Map> input = new TreeMap<>(Map.of(DataType.LONG.typeName(), new LinkedHashSet<>(Set.of("idx_a")))); + + InvalidMappedField2 field = InvalidMappedField2.potentiallyUnmapped("f", input); + + assertThat(field.isPotentiallyUnmapped(), equalTo(true)); + assertThat(field.errorMessage(), containsString("[keyword] due to loading from _source")); + assertThat(field.getTypesToIndices(), hasEntry(DataType.LONG.typeName(), Set.of("idx_a"))); + } + + public void testTypesReflectsKeysOfTruncatedMap() { + Map> input = new TreeMap<>( + Map.of( + DataType.KEYWORD.typeName(), + IntStream.range(0, 100).mapToObj(i -> "k" + i).collect(Collectors.toSet()), + DataType.LONG.typeName(), + Set.of("only") + ) + ); + + InvalidMappedField2 field = new InvalidMappedField2("f", input); + + assertThat(field.types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); + } + +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java new file mode 100644 index 0000000000000..e6c238099ac4e --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java @@ -0,0 +1,168 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.type; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; +import org.elasticsearch.xpack.esql.expression.ExpressionWritables; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianShape; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToVersion; +import org.elasticsearch.xpack.esql.session.Configuration; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration; + +/** + * Mirror of {@link MultiTypeEsFieldTests} for the type-keyed {@link MultiTypeEsField2}. + */ +public class MultiTypeEsField2Tests extends AbstractEsFieldTypeTests { + + private Configuration config; + + @Before + public void initConfig() { + config = randomConfiguration(); + } + + @Override + protected Configuration config() { + return config; + } + + @Override + protected MultiTypeEsField2 createTestInstance() { + String name = randomAlphaOfLength(4); + boolean toString = randomBoolean(); + DataType dataType = randomFrom(types()); + DataType toType = toString ? DataType.KEYWORD : dataType; + Map typeToConvertExpressions = randomConvertExpressions(name, toString, dataType); + Expression unmappedConversionExpression = randomBoolean() ? null : createToString(name, dataType); + + EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); + + return new MultiTypeEsField2(name, toType, false, typeToConvertExpressions, tsType, unmappedConversionExpression); + } + + @Override + protected MultiTypeEsField2 mutateInstance(MultiTypeEsField2 instance) throws IOException { + String name = instance.getName(); + DataType dataType = instance.getDataType(); + Map typeToConvertExpressions = instance.getTypeToConversionExpressions(); + EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); + Expression unmappedConversionExpression = instance.getUnmappedConversionExpression(); + switch (between(0, 4)) { + case 0 -> name = randomAlphaOfLength(name.length() + 1); + case 1 -> dataType = randomValueOtherThan(dataType, () -> randomFrom(DataType.types())); + case 2 -> typeToConvertExpressions = mutateConvertExpressions(name, dataType, typeToConvertExpressions); + case 3 -> tsType = randomValueOtherThan(tsType, () -> randomFrom(EsField.TimeSeriesFieldType.values())); + case 4 -> unmappedConversionExpression = unmappedConversionExpression != null ? null : createToString(name, dataType); + default -> throw new IllegalArgumentException(); + } + return new MultiTypeEsField2(name, dataType, false, typeToConvertExpressions, tsType, unmappedConversionExpression); + } + + @Override + protected final NamedWriteableRegistry getNamedWriteableRegistry() { + List entries = new ArrayList<>(ExpressionWritables.allExpressions()); + entries.addAll(ExpressionWritables.unaryScalars()); + return new NamedWriteableRegistry(entries); + } + + /** + * Random map keyed by {@link DataType#typeName()}, so it can be used as the source-type map of + * {@link MultiTypeEsField2}. + */ + private Map randomConvertExpressions(String name, boolean toString, DataType dataType) { + Map typeToConvertExpressions = new HashMap<>(); + if (toString) { + typeToConvertExpressions.put(DataType.KEYWORD.typeName(), createToString(name, DataType.KEYWORD)); + typeToConvertExpressions.put(dataType.typeName(), createToString(name, dataType)); + } else { + typeToConvertExpressions.put(DataType.KEYWORD.typeName(), testConvertExpression(name, DataType.KEYWORD, dataType)); + typeToConvertExpressions.put(dataType.typeName(), testConvertExpression(name, dataType, dataType)); + } + return typeToConvertExpressions; + } + + private Map mutateConvertExpressions( + String name, + DataType toType, + Map typeToConvertExpressions + ) { + return randomValueOtherThan( + typeToConvertExpressions, + () -> randomConvertExpressions(name, toType == DataType.KEYWORD, randomFrom(types())) + ); + } + + private static List types() { + return List.of( + DataType.BOOLEAN, + DataType.DATETIME, + DataType.DOUBLE, + DataType.FLOAT, + DataType.INTEGER, + DataType.IP, + DataType.KEYWORD, + DataType.LONG, + DataType.GEO_POINT, + DataType.GEO_SHAPE, + DataType.CARTESIAN_POINT, + DataType.CARTESIAN_SHAPE, + DataType.VERSION + ); + } + + private Expression testConvertExpression(String name, DataType fromType, DataType toType) { + FieldAttribute fromField = fieldAttribute(name, fromType); + return switch (toType) { + case BOOLEAN -> new ToBoolean(Source.EMPTY, fromField); + case DATETIME -> new ToDatetime(Source.EMPTY, fromField, config()); + case DOUBLE, FLOAT -> new ToDouble(Source.EMPTY, fromField); + case INTEGER -> new ToInteger(Source.EMPTY, fromField); + case LONG -> new ToLong(Source.EMPTY, fromField); + case IP -> new ToIpLeadingZerosRejected(Source.EMPTY, fromField); + case KEYWORD, TEXT -> new ToString(Source.EMPTY, fromField, config()); + case GEO_POINT -> new ToGeoPoint(Source.EMPTY, fromField); + case GEO_SHAPE -> new ToGeoShape(Source.EMPTY, fromField); + case CARTESIAN_POINT -> new ToCartesianPoint(Source.EMPTY, fromField); + case CARTESIAN_SHAPE -> new ToCartesianShape(Source.EMPTY, fromField); + case VERSION -> new ToVersion(Source.EMPTY, fromField); + default -> throw new UnsupportedOperationException("Conversion from " + fromType + " to " + toType + " is not supported"); + }; + } + + private static FieldAttribute fieldAttribute(String name, DataType dataType) { + return new FieldAttribute(Source.EMPTY, name, new EsField(name, dataType, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); + } + + private ToString createToString(String name, DataType dataType) { + return new ToString(Source.EMPTY, fieldAttribute(name, dataType), config()); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java new file mode 100644 index 0000000000000..e1016fe35418c --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.type; + +import org.apache.lucene.tests.util.RamUsageTester; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField2; +import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; + +import java.lang.reflect.Field; +import java.time.ZoneId; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.elasticsearch.xpack.esql.EsqlTestUtils.analyzer; +import static org.hamcrest.Matchers.lessThan; + +/** + * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially + * less memory under {@link MultiTypeEsField2} (paired with {@link InvalidMappedField2}'s truncated index lists) than under the legacy + * {@link org.elasticsearch.xpack.esql.core.type.MultiTypeEsField} (keyed per-index) paired with a full {@link InvalidMappedField}. + * + *

The cost we're targeting is {@code O(num_fields * num_indices)}: each conflicting field expands into its own per-index conversion + * map under legacy, and per-type under v2. With many fields the constant overhead from {@link EsIndex#indexNameWithModes} becomes a + * small fixed tax, and the savings show up cleanly at plan-total scope. + * + *

One subtlety in the fixture: each conflicting field gets its own {@code typesToIndices} map. Sharing one across all fields + * (even though it's logically the same content) makes {@link RamUsageTester}'s identity-based dedup collapse all 50 analyzer-derived + * conversion structures down to roughly one field's worth, which masks the savings. Production index resolutions don't share these maps + * across fields - they're built per-field from {@code FieldCapabilitiesResponse} - so the per-field copy here is the realistic case. + */ +public class MultiTypeEsFieldMemoryTests extends ESTestCase { + + private static final int NUM_INDICES = 5_000; + private static final int NUM_CONFLICTING_FIELDS = 50; + + /** + * {@link RamUsageTester} walks reflectively, which fails on JDK-internal classes (e.g. {@code sun.util.locale.BaseLocale}) that + * aren't opened to unnamed modules. The plan transitively references a {@link Locale} and a {@link ZoneId} via the analyzer's + * {@code Configuration}, so we treat those as opaque - they're irrelevant to the union-type memory we care about here. + */ + private static final RamUsageTester.Accumulator ACCUMULATOR = new RamUsageTester.Accumulator() { + @Override + public long accumulateObject(Object o, long shallowSize, Map fieldValues, Collection queue) { + if (o instanceof Locale || o instanceof ZoneId) { + return shallowSize; + } + return super.accumulateObject(o, shallowSize, fieldValues, queue); + } + }; + + public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { + String query = buildExplicitConversionQuery(NUM_CONFLICTING_FIELDS); + + LogicalPlan legacyPlan = analyzer().addIndex(unionTypedIndex(NUM_INDICES, NUM_CONFLICTING_FIELDS, false)) + .minimumTransportVersion(TransportVersionUtils.randomVersionNotSupporting(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2)) + .query(query); + LogicalPlan v2Plan = analyzer().addIndex(unionTypedIndex(NUM_INDICES, NUM_CONFLICTING_FIELDS, true)) + .minimumTransportVersion(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2) + .query(query); + + long legacyBytes = RamUsageTester.ramUsed(legacyPlan, ACCUMULATOR); + long v2Bytes = RamUsageTester.ramUsed(v2Plan, ACCUMULATOR); + logger.info("legacy plan bytes={}, v2 plan bytes={}", legacyBytes, v2Bytes); + assertThat(v2Bytes * 10L, lessThan(legacyBytes)); + } + + /** + * Build a query that forces the analyzer to materialize a {@code MultiTypeEsField}/{@link MultiTypeEsField2} for every + * {@code id_} field by explicitly casting each to keyword. + */ + private static String buildExplicitConversionQuery(int numFields) { + String evalAssignments = IntStream.range(0, numFields) + .mapToObj(i -> "id_" + i + "_kw = id_" + i + "::keyword") + .collect(Collectors.joining(", ")); + String keepFields = IntStream.range(0, numFields).mapToObj(i -> "id_" + i + "_kw").collect(Collectors.joining(", ")); + return "FROM idx* | EVAL " + evalAssignments + " | KEEP " + keepFields + " | LIMIT 1"; + } + + /** + * Build a fake "idx*" pattern with {@code numIndices} concrete indices and {@code numConflictingFields} fields {@code id_0..id_}, + * each with type {@code keyword} in half of the indices and {@code integer} in the other half. When {@code compact} is true the + * conflicting fields are built from {@link InvalidMappedField2} (truncated index lists), matching what a v2-capable coordinator + * produces; otherwise the full {@link InvalidMappedField} is used. + */ + private static IndexResolution unionTypedIndex(int numIndices, int numConflictingFields, boolean compact) { + Map indexNamesWithModes = new HashMap<>(); + for (int i = 0; i < numIndices; i++) { + indexNamesWithModes.put("idx_" + i, IndexMode.STANDARD); + } + Map mapping = new HashMap<>(); + for (int i = 0; i < numConflictingFields; i++) { + String fieldName = "id_" + i; + Map> perFieldTypesToIndices = new HashMap<>(); + perFieldTypesToIndices.put("keyword", new HashSet<>()); + perFieldTypesToIndices.put("integer", new HashSet<>()); + for (int j = 0; j < numIndices; j++) { + String idxName = "idx_" + j; + perFieldTypesToIndices.get(j % 2 == 0 ? "keyword" : "integer").add(idxName); + } + mapping.put( + fieldName, + compact + ? new InvalidMappedField2(fieldName, perFieldTypesToIndices) + : new InvalidMappedField(fieldName, perFieldTypesToIndices) + ); + } + EsIndex esIndex = new EsIndex("idx*", mapping, indexNamesWithModes, Map.of(), Map.of()); + return IndexResolution.valid(esIndex); + } +} From e153204ab61b27e5a786cc57bda7c712e9b31467 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 28 Apr 2026 19:19:25 +0300 Subject: [PATCH 07/20] Some human refactors --- .../xpack/esql/analysis/Analyzer.java | 15 ++++++----- .../esql/core/type/InvalidMappedField2.java | 7 +++--- .../esql/type/InvalidMappedField2Tests.java | 5 ---- .../type/MultiTypeEsFieldMemoryTests.java | 25 +++++++------------ 4 files changed, 19 insertions(+), 33 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index cda937e1fcaa7..0973c0b059317 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -2249,7 +2249,7 @@ private static class ResolveUnionTypes extends ParameterizedRule typesToConversionExpressions, @Nullable Expression unmappedConversionExpression, AnalyzerContext context ) { - if (context.minimumVersion().supports(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2)) { - return MultiTypeEsField2.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression); - } - return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) - .withPotentiallyUnmappedExpression(unmappedConversionExpression); + return context.minimumVersion().supports(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2) + ? MultiTypeEsField2.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression) + : MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) + .withPotentiallyUnmappedExpression(unmappedConversionExpression); } @Override public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { List unionFieldAttributes = new ArrayList<>(); - return plan.transformUp(LogicalPlan.class, p -> p.childrenResolved() == false ? p : doRule(p, unionFieldAttributes, context)); + return plan.transformUp(LogicalPlan.class, p -> p.childrenResolved() ? doRule(p, unionFieldAttributes, context) : p); } private LogicalPlan doRule(LogicalPlan plan, List unionFieldAttributes, AnalyzerContext context) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java index 84c906946a68d..3b749de61b896 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java @@ -26,9 +26,8 @@ * anyway, so the truncation no longer matters. */ public class InvalidMappedField2 extends InvalidMappedField { - - public static final String ELLIPSIS = "..."; - public static final int MAX_INDICES_PER_TYPE = 3; + private static final String ELLIPSIS = "..."; + private static final int MAX_INDICES_PER_TYPE = 3; public InvalidMappedField2(String name, Map> typesToIndices) { super(name, makeErrorMessage(typesToIndices, false), new TreeMap<>(), truncate(typesToIndices), false, TimeSeriesFieldType.UNKNOWN); @@ -61,7 +60,7 @@ private InvalidMappedField2( * dropped. The retained 3 are picked by sorted order so that the (already truncated) error message and the stored set stay * consistent. */ - static Map> truncate(Map> typesToIndices) { + private static Map> truncate(Map> typesToIndices) { Map> result = new TreeMap<>(); for (Map.Entry> entry : typesToIndices.entrySet()) { Set indices = entry.getValue(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java index 817ba7ebf721e..8425e00be3ade 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java @@ -23,10 +23,8 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasEntry; -import static org.hamcrest.Matchers.not; public class InvalidMappedField2Tests extends ESTestCase { - public void testKeepsAllIndicesWhenAtOrBelowLimit() { Map> input = Map.of( DataType.KEYWORD.typeName(), @@ -41,9 +39,6 @@ public void testKeepsAllIndicesWhenAtOrBelowLimit() { assertThat(stored.keySet(), containsInAnyOrder(DataType.KEYWORD.typeName(), DataType.LONG.typeName())); assertThat(stored.get(DataType.KEYWORD.typeName()), containsInAnyOrder("idx_a", "idx_b")); assertThat(stored.get(DataType.LONG.typeName()), containsInAnyOrder("idx_c", "idx_d", "idx_e")); - for (Set indices : stored.values()) { - assertThat(indices, not(containsInAnyOrder(InvalidMappedField2.ELLIPSIS))); - } } public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index e1016fe35418c..f667eb3e070aa 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -48,7 +48,6 @@ * across fields - they're built per-field from {@code FieldCapabilitiesResponse} - so the per-field copy here is the realistic case. */ public class MultiTypeEsFieldMemoryTests extends ESTestCase { - private static final int NUM_INDICES = 5_000; private static final int NUM_CONFLICTING_FIELDS = 50; @@ -60,26 +59,22 @@ public class MultiTypeEsFieldMemoryTests extends ESTestCase { private static final RamUsageTester.Accumulator ACCUMULATOR = new RamUsageTester.Accumulator() { @Override public long accumulateObject(Object o, long shallowSize, Map fieldValues, Collection queue) { - if (o instanceof Locale || o instanceof ZoneId) { - return shallowSize; - } - return super.accumulateObject(o, shallowSize, fieldValues, queue); + return o instanceof Locale || o instanceof ZoneId ? shallowSize : super.accumulateObject(o, shallowSize, fieldValues, queue); } }; public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { String query = buildExplicitConversionQuery(NUM_CONFLICTING_FIELDS); - LogicalPlan legacyPlan = analyzer().addIndex(unionTypedIndex(NUM_INDICES, NUM_CONFLICTING_FIELDS, false)) + LogicalPlan legacyPlan = analyzer().addIndex(unionTypedIndex(false)) .minimumTransportVersion(TransportVersionUtils.randomVersionNotSupporting(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2)) .query(query); - LogicalPlan v2Plan = analyzer().addIndex(unionTypedIndex(NUM_INDICES, NUM_CONFLICTING_FIELDS, true)) + LogicalPlan v2Plan = analyzer().addIndex(unionTypedIndex(true)) .minimumTransportVersion(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2) .query(query); long legacyBytes = RamUsageTester.ramUsed(legacyPlan, ACCUMULATOR); long v2Bytes = RamUsageTester.ramUsed(v2Plan, ACCUMULATOR); - logger.info("legacy plan bytes={}, v2 plan bytes={}", legacyBytes, v2Bytes); assertThat(v2Bytes * 10L, lessThan(legacyBytes)); } @@ -101,20 +96,19 @@ private static String buildExplicitConversionQuery(int numFields) { * conflicting fields are built from {@link InvalidMappedField2} (truncated index lists), matching what a v2-capable coordinator * produces; otherwise the full {@link InvalidMappedField} is used. */ - private static IndexResolution unionTypedIndex(int numIndices, int numConflictingFields, boolean compact) { + private static IndexResolution unionTypedIndex(boolean compact) { Map indexNamesWithModes = new HashMap<>(); - for (int i = 0; i < numIndices; i++) { + for (int i = 0; i < MultiTypeEsFieldMemoryTests.NUM_INDICES; i++) { indexNamesWithModes.put("idx_" + i, IndexMode.STANDARD); } Map mapping = new HashMap<>(); - for (int i = 0; i < numConflictingFields; i++) { + for (int i = 0; i < MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS; i++) { String fieldName = "id_" + i; Map> perFieldTypesToIndices = new HashMap<>(); perFieldTypesToIndices.put("keyword", new HashSet<>()); perFieldTypesToIndices.put("integer", new HashSet<>()); - for (int j = 0; j < numIndices; j++) { - String idxName = "idx_" + j; - perFieldTypesToIndices.get(j % 2 == 0 ? "keyword" : "integer").add(idxName); + for (int j = 0; j < MultiTypeEsFieldMemoryTests.NUM_INDICES; j++) { + perFieldTypesToIndices.get(j % 2 == 0 ? "keyword" : "integer").add("idx_" + j); } mapping.put( fieldName, @@ -123,7 +117,6 @@ private static IndexResolution unionTypedIndex(int numIndices, int numConflictin : new InvalidMappedField(fieldName, perFieldTypesToIndices) ); } - EsIndex esIndex = new EsIndex("idx*", mapping, indexNamesWithModes, Map.of(), Map.of()); - return IndexResolution.valid(esIndex); + return IndexResolution.valid(new EsIndex("idx*", mapping, indexNamesWithModes, Map.of(), Map.of())); } } From 9bcb6c8ffc6e2651fd5dc8b4314bdd3b8494858f Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 28 Apr 2026 19:26:55 +0300 Subject: [PATCH 08/20] matchesMap --- .../esql/type/InvalidMappedField2Tests.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java index 817ba7ebf721e..c9ad1ce0e3e19 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java @@ -19,11 +19,11 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasEntry; -import static org.hamcrest.Matchers.not; public class InvalidMappedField2Tests extends ESTestCase { @@ -37,13 +37,11 @@ public void testKeepsAllIndicesWhenAtOrBelowLimit() { InvalidMappedField2 field = new InvalidMappedField2("f", input); - Map> stored = field.getTypesToIndices(); - assertThat(stored.keySet(), containsInAnyOrder(DataType.KEYWORD.typeName(), DataType.LONG.typeName())); - assertThat(stored.get(DataType.KEYWORD.typeName()), containsInAnyOrder("idx_a", "idx_b")); - assertThat(stored.get(DataType.LONG.typeName()), containsInAnyOrder("idx_c", "idx_d", "idx_e")); - for (Set indices : stored.values()) { - assertThat(indices, not(containsInAnyOrder(InvalidMappedField2.ELLIPSIS))); - } + assertMap( + field.getTypesToIndices(), + matchesMap().entry(DataType.KEYWORD.typeName(), Set.of("idx_a", "idx_b")) + .entry(DataType.LONG.typeName(), Set.of("idx_c", "idx_d", "idx_e")) + ); } public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { @@ -54,8 +52,10 @@ public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { InvalidMappedField2 field = new InvalidMappedField2("f", input); - Set stored = field.getTypesToIndices().get(DataType.KEYWORD.typeName()); - assertThat(stored, equalTo(Set.of("idx_00000", "idx_00001", "idx_00002", InvalidMappedField2.ELLIPSIS))); + assertMap( + field.getTypesToIndices(), + matchesMap().entry(DataType.KEYWORD.typeName(), Set.of("idx_00000", "idx_00001", "idx_00002", InvalidMappedField2.ELLIPSIS)) + ); } public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { @@ -91,7 +91,7 @@ public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { assertThat(field.isPotentiallyUnmapped(), equalTo(true)); assertThat(field.errorMessage(), containsString("[keyword] due to loading from _source")); - assertThat(field.getTypesToIndices(), hasEntry(DataType.LONG.typeName(), Set.of("idx_a"))); + assertMap(field.getTypesToIndices(), matchesMap().entry(DataType.LONG.typeName(), Set.of("idx_a"))); } public void testTypesReflectsKeysOfTruncatedMap() { From 6e4cd276da0533c863edbdf0d7640bbf291c5343 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 28 Apr 2026 19:46:07 +0300 Subject: [PATCH 09/20] Some more human refactors --- .../xpack/esql/analysis/Analyzer.java | 26 +++++++------ ...d2.java => CompactInvalidMappedField.java} | 14 ++++--- ...eld2.java => CompactMultiTypeEsField.java} | 37 ++++++++----------- .../xpack/esql/core/type/EsField.java | 2 +- .../esql/core/type/UnionTypeEsField.java | 2 +- .../planner/EsPhysicalOperationProviders.java | 4 +- ...va => CompactInvalidMappedFieldTests.java} | 19 +++++----- ...java => CompactMultiTypeEsFieldTests.java} | 16 ++++---- .../type/MultiTypeEsFieldMemoryTests.java | 16 ++++---- 9 files changed, 68 insertions(+), 68 deletions(-) rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/{InvalidMappedField2.java => CompactInvalidMappedField.java} (84%) rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/{MultiTypeEsField2.java => CompactMultiTypeEsField.java} (87%) rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/{InvalidMappedField2Tests.java => CompactInvalidMappedFieldTests.java} (80%) rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/{MultiTypeEsField2Tests.java => CompactMultiTypeEsFieldTests.java} (91%) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 0973c0b059317..90d08028c4d67 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -53,11 +53,11 @@ import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; @@ -2250,11 +2250,11 @@ private static class ResolveUnionTypes extends ParameterizedRule p.childrenResolved() ? doRule(p, unionFieldAttributes, context) : p); } - private LogicalPlan doRule(LogicalPlan plan, List unionFieldAttributes, AnalyzerContext context) { + private static LogicalPlan doRule( + LogicalPlan plan, + List unionFieldAttributes, + AnalyzerContext context + ) { Holder alreadyAddedUnionFieldAttributes = new Holder<>(unionFieldAttributes.size()); // Collect field attributes from previous runs if (plan instanceof EsRelation rel) { @@ -2345,7 +2349,7 @@ private static LogicalPlan addGeneratedFieldsToEsRelations(LogicalPlan plan, Lis return res; } - private Expression resolveConvertFunction( + private static Expression resolveConvertFunction( ConvertFunction convert, List unionFieldAttributes, AnalyzerContext context @@ -2428,12 +2432,12 @@ private static EsField rewrapWithCast(FieldAttribute fa, Expression convertExpre null ); } else { - MultiTypeEsField2 mtf = (MultiTypeEsField2) fa.field(); + CompactMultiTypeEsField mtf = (CompactMultiTypeEsField) fa.field(); Map typeToConversionExpressions = new HashMap<>(); for (Map.Entry entry : mtf.getTypeToConversionExpressions().entrySet()) { typeToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); } - return new MultiTypeEsField2( + return new CompactMultiTypeEsField( fa.fieldName().string(), convertExpression.dataType(), false, @@ -2449,7 +2453,7 @@ private static Expression wrapWith(Expression convertExpression, Expression orig return convertExpression.replaceChildren(Collections.singletonList(inner.field())); } - private Expression createIfDoesNotAlreadyExist( + private static Expression createIfDoesNotAlreadyExist( FieldAttribute fa, EsField resolvedField, List unionFieldAttributes @@ -2499,7 +2503,7 @@ static EsField resolvedMultiTypeEsField( private static boolean canConvertOriginalTypes(EsField multiTypeEsField, Set supportedTypes) { Map conversionExpressions = multiTypeEsField instanceof MultiTypeEsField legacy ? legacy.getIndexToConversionExpressions() - : ((MultiTypeEsField2) multiTypeEsField).getTypeToConversionExpressions(); + : ((CompactMultiTypeEsField) multiTypeEsField).getTypeToConversionExpressions(); return conversionExpressions.values() .stream() .allMatch( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java similarity index 84% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java index 3b749de61b896..82a16b6ca6c5b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField2.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java @@ -12,11 +12,13 @@ import java.util.Set; import java.util.TreeMap; +// FIXME(gal, NOCOMMIT) Go over this javadocs +// FIXME(gal, NOCOMMIT) Reduce duplication with InvalidMappedField /** * Memory-frugal variant of {@link InvalidMappedField}: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source type * (plus the {@value #ELLIPSIS} sentinel when more existed) instead of the full per-type index list. Wide union-typed fields routinely span * thousands of indices but the only consumers that need the full list are the legacy index-keyed conversion structures, and they aren't - * used on transport versions that support {@link MultiTypeEsField2}. Truncating here lets the analyzed plan stay small while still + * used on transport versions that support {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while still * producing a good "[a, b, c, ...]" error message: the message itself is rendered from the full input map at construction time and then * stored as a string, so we lose only the post-construction ability to enumerate every index. * @@ -25,16 +27,16 @@ * plain {@link InvalidMappedField} on the receiving side — that's fine because {@code typesToIndices} is empty after deserialization * anyway, so the truncation no longer matters. */ -public class InvalidMappedField2 extends InvalidMappedField { +public class CompactInvalidMappedField extends InvalidMappedField { private static final String ELLIPSIS = "..."; private static final int MAX_INDICES_PER_TYPE = 3; - public InvalidMappedField2(String name, Map> typesToIndices) { + public CompactInvalidMappedField(String name, Map> typesToIndices) { super(name, makeErrorMessage(typesToIndices, false), new TreeMap<>(), truncate(typesToIndices), false, TimeSeriesFieldType.UNKNOWN); } - public static InvalidMappedField2 potentiallyUnmapped(String name, Map> typesToIndices) { - return new InvalidMappedField2( + public static CompactInvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { + return new CompactInvalidMappedField( name, makeErrorMessage(typesToIndices, true), new TreeMap<>(), @@ -44,7 +46,7 @@ public static InvalidMappedField2 potentiallyUnmapped(String name, Map properties, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java similarity index 87% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index 58791a3ff0156..be631527c4356 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField2.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -22,6 +22,8 @@ import java.util.Objects; import java.util.Set; +// FIXME(gal, NOCOMMIT) Go over this javadocs +// FIXME(gal, NOCOMMIT) Reduce duplication with MultiTypeEsField /** * Memory-efficient variant of {@link MultiTypeEsField} that stores the per-source-type conversion * expressions directly, rather than expanding them to one entry per index. Plus an optional @@ -33,16 +35,20 @@ * legacy {@link MultiTypeEsField} when the cluster minimum transport version does not yet support * {@code esql_multi_type_es_field_2}. */ -public class MultiTypeEsField2 extends EsField implements UnionTypeEsField { - +public class CompactMultiTypeEsField extends EsField implements UnionTypeEsField { + // FIXME(gal, NOCOMMIT) rename public static final TransportVersion ESQL_MULTI_TYPE_ES_FIELD_2 = TransportVersion.fromName("esql_multi_type_es_field_2"); private final Map typeToConversionExpressions; + /** + * If this is not {@code null}, then this expression should be used to convert the field value in case the field is not mapped in an + * index from {@link DataType#KEYWORD} to the target type. + */ @Nullable private final Expression unmappedConversionExpression; - public MultiTypeEsField2( + public CompactMultiTypeEsField( String name, DataType dataType, boolean aggregatable, @@ -55,7 +61,7 @@ public MultiTypeEsField2( this.unmappedConversionExpression = unmappedConversionExpression; } - protected MultiTypeEsField2(StreamInput in) throws IOException { + protected CompactMultiTypeEsField(StreamInput in) throws IOException { this( ((PlanStreamInput) in).readCachedString(), DataType.readFrom(in), @@ -78,12 +84,12 @@ public void writeContent(StreamOutput out) throws IOException { @Override public String getWriteableName(TransportVersion transportVersion) { - return "MultiTypeEsField2"; + return getNodeStringName(); } @Override public String getNodeStringName() { - return "MultiTypeEsField2"; + return "CompactMultiTypeEsField"; } public Map getTypeToConversionExpressions() { @@ -104,23 +110,12 @@ public Map getTypeToConversionExpressions() { return unmappedConversionExpression; } - public MultiTypeEsField2 withUnmappedConversionExpression(@Nullable Expression unmappedConversionExpression) { - return new MultiTypeEsField2( - getName(), - getDataType(), - isAggregatable(), - typeToConversionExpressions, - getTimeSeriesFieldType(), - unmappedConversionExpression - ); - } - /** - * Build a {@link MultiTypeEsField2} from the per-type resolutions previously computed against an + * Build a {@link CompactMultiTypeEsField} from the per-type resolutions previously computed against an * {@link InvalidMappedField}. Only types present in {@code imf.getTypesToIndices()} for which a * conversion was supplied are included. */ - public static MultiTypeEsField2 resolveFrom( + public static CompactMultiTypeEsField resolveFrom( InvalidMappedField imf, Map typesToConversionExpressions, @Nullable Expression unmappedConversionExpression @@ -143,7 +138,7 @@ public static MultiTypeEsField2 resolveFrom( if (resolvedDataType == DataType.UNSUPPORTED && unmappedConversionExpression != null) { resolvedDataType = unmappedConversionExpression.dataType(); } - return new MultiTypeEsField2( + return new CompactMultiTypeEsField( imf.getName(), resolvedDataType, false, @@ -158,7 +153,7 @@ public boolean equals(Object obj) { if (super.equals(obj) == false) { return false; } - if (obj instanceof MultiTypeEsField2 other) { + if (obj instanceof CompactMultiTypeEsField other) { return typeToConversionExpressions.equals(other.typeToConversionExpressions) && Objects.equals(unmappedConversionExpression, other.unmappedConversionExpression); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java index bf789bb7e64b5..be9208ef4f73d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java @@ -107,7 +107,7 @@ public static TimeSeriesFieldType fromIndexFieldCapabilities(IndexFieldCapabilit Map.entry("KeywordEsField", KeywordEsField::new), Map.entry("MissingEsField", MissingEsField::new), Map.entry("MultiTypeEsField", MultiTypeEsField::new), - Map.entry("MultiTypeEsField2", MultiTypeEsField2::new), + Map.entry("CompactMultiTypeEsField", CompactMultiTypeEsField::new), Map.entry("PotentiallyUnmappedKeywordEsField", PotentiallyUnmappedKeywordEsField::new), Map.entry("TextEsField", TextEsField::new), Map.entry("UnsupportedEsField", UnsupportedEsField::new) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java index e4d9008ad09fe..5eba83c5f9eb5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java @@ -12,7 +12,7 @@ /** * Common interface implemented by both {@link MultiTypeEsField} (legacy, keyed by index name) and - * {@link MultiTypeEsField2} (newer, keyed by source data type) so that callers that only care about + * {@link CompactMultiTypeEsField} (newer, keyed by source data type) so that callers that only care about * the existence of a per-(index|type) conversion or about the unmapped-side conversion can treat the * two implementations uniformly. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index da72a87016d22..239e498b8807d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -80,11 +80,11 @@ import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.TemporalityAttribute; import org.elasticsearch.xpack.esql.core.expression.TimeSeriesMetadataAttribute; +import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.FunctionEsField; import org.elasticsearch.xpack.esql.core.type.KeywordEsField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.expression.function.BlockLoaderWarnings; @@ -276,7 +276,7 @@ private ValuesSourceReaderOperator.LoaderAndConverter blockLoaderAndConverter( MappedFieldType mft = shardContext.fieldType(fieldName); conversion = mft == null ? null - : ((MultiTypeEsField2) unionTypes).getConversionExpressionForType( + : ((CompactMultiTypeEsField) unionTypes).getConversionExpressionForType( EsqlDataTypeRegistry.INSTANCE.fromEs(mft.typeName(), mft.getMetricType()) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java similarity index 80% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java index 9144b0d07178b..9a174ee038cca 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedField2Tests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java @@ -8,9 +8,9 @@ package org.elasticsearch.xpack.esql.type; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField2; import java.util.LinkedHashSet; import java.util.Map; @@ -25,7 +25,7 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -public class InvalidMappedField2Tests extends ESTestCase { +public class CompactInvalidMappedFieldTests extends ESTestCase { public void testKeepsAllIndicesWhenAtOrBelowLimit() { Map> input = Map.of( DataType.KEYWORD.typeName(), @@ -35,7 +35,7 @@ public void testKeepsAllIndicesWhenAtOrBelowLimit() { ); assertMap( - new InvalidMappedField2("f", input).getTypesToIndices(), + new CompactInvalidMappedField("f", input).getTypesToIndices(), matchesMap().entry(DataType.KEYWORD.typeName(), Set.of("idx_a", "idx_b")) .entry(DataType.LONG.typeName(), Set.of("idx_c", "idx_d", "idx_e")) ); @@ -46,7 +46,7 @@ public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { .mapToObj(i -> String.format("idx_%05d", i)) .collect(Collectors.toCollection(LinkedHashSet::new)); - InvalidMappedField2 input = new InvalidMappedField2("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); + CompactInvalidMappedField input = new CompactInvalidMappedField("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); assertMap( input.getTypesToIndices(), @@ -60,8 +60,9 @@ public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { .collect(Collectors.toCollection(LinkedHashSet::new)); Map> input = new TreeMap<>(Map.of(DataType.KEYWORD.typeName(), manyIndices)); - String message = new InvalidMappedField2("f", input).errorMessage(); + String message = new CompactInvalidMappedField("f", input).errorMessage(); + // FIXME(gal, NOCOMMIT) Can we have a less testing of the string contents? Maybe using a TreeMap? assertThat(message, containsString("[1] incompatible types")); assertThat(message, containsString("[idx_00000, idx_00001, idx_00002]")); assertThat(message, containsString("[" + (5_000 - 3) + "] other indices")); @@ -77,13 +78,13 @@ public void testErrorMessageMatchesInvalidMappedFieldForSmallInputs() { ) ); - assertThat(new InvalidMappedField2("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); + assertThat(new CompactInvalidMappedField("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); } public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { Map> input = new TreeMap<>(Map.of(DataType.LONG.typeName(), new LinkedHashSet<>(Set.of("idx_a")))); - InvalidMappedField2 field = InvalidMappedField2.potentiallyUnmapped("f", input); + CompactInvalidMappedField field = CompactInvalidMappedField.potentiallyUnmapped("f", input); assertThat(field.isPotentiallyUnmapped(), equalTo(true)); assertThat(field.errorMessage(), containsString("[keyword] due to loading from _source")); @@ -100,9 +101,7 @@ public void testTypesReflectsKeysOfTruncatedMap() { ) ); - InvalidMappedField2 field = new InvalidMappedField2("f", input); - - assertThat(field.types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); + assertThat(new CompactInvalidMappedField("f", input).types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java similarity index 91% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java index e6c238099ac4e..cfa5097435cb9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField2Tests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java @@ -11,9 +11,9 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; import org.elasticsearch.xpack.esql.expression.ExpressionWritables; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint; @@ -39,9 +39,9 @@ import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration; /** - * Mirror of {@link MultiTypeEsFieldTests} for the type-keyed {@link MultiTypeEsField2}. + * Mirror of {@link MultiTypeEsFieldTests} for the type-keyed {@link CompactMultiTypeEsField}. */ -public class MultiTypeEsField2Tests extends AbstractEsFieldTypeTests { +public class CompactMultiTypeEsFieldTests extends AbstractEsFieldTypeTests { private Configuration config; @@ -56,7 +56,7 @@ protected Configuration config() { } @Override - protected MultiTypeEsField2 createTestInstance() { + protected CompactMultiTypeEsField createTestInstance() { String name = randomAlphaOfLength(4); boolean toString = randomBoolean(); DataType dataType = randomFrom(types()); @@ -66,11 +66,11 @@ protected MultiTypeEsField2 createTestInstance() { EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); - return new MultiTypeEsField2(name, toType, false, typeToConvertExpressions, tsType, unmappedConversionExpression); + return new CompactMultiTypeEsField(name, toType, false, typeToConvertExpressions, tsType, unmappedConversionExpression); } @Override - protected MultiTypeEsField2 mutateInstance(MultiTypeEsField2 instance) throws IOException { + protected CompactMultiTypeEsField mutateInstance(CompactMultiTypeEsField instance) throws IOException { String name = instance.getName(); DataType dataType = instance.getDataType(); Map typeToConvertExpressions = instance.getTypeToConversionExpressions(); @@ -84,7 +84,7 @@ protected MultiTypeEsField2 mutateInstance(MultiTypeEsField2 instance) throws IO case 4 -> unmappedConversionExpression = unmappedConversionExpression != null ? null : createToString(name, dataType); default -> throw new IllegalArgumentException(); } - return new MultiTypeEsField2(name, dataType, false, typeToConvertExpressions, tsType, unmappedConversionExpression); + return new CompactMultiTypeEsField(name, dataType, false, typeToConvertExpressions, tsType, unmappedConversionExpression); } @Override @@ -96,7 +96,7 @@ protected final NamedWriteableRegistry getNamedWriteableRegistry() { /** * Random map keyed by {@link DataType#typeName()}, so it can be used as the source-type map of - * {@link MultiTypeEsField2}. + * {@link CompactMultiTypeEsField}. */ private Map randomConvertExpressions(String name, boolean toString, DataType dataType) { Map typeToConvertExpressions = new HashMap<>(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index f667eb3e070aa..32d30a95adb1b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -11,10 +11,10 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField2; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField2; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -35,7 +35,7 @@ /** * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially - * less memory under {@link MultiTypeEsField2} (paired with {@link InvalidMappedField2}'s truncated index lists) than under the legacy + * less memory under {@link CompactMultiTypeEsField} (paired with {@link CompactInvalidMappedField}'s truncated index lists) than under the legacy * {@link org.elasticsearch.xpack.esql.core.type.MultiTypeEsField} (keyed per-index) paired with a full {@link InvalidMappedField}. * *

The cost we're targeting is {@code O(num_fields * num_indices)}: each conflicting field expands into its own per-index conversion @@ -67,10 +67,10 @@ public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { String query = buildExplicitConversionQuery(NUM_CONFLICTING_FIELDS); LogicalPlan legacyPlan = analyzer().addIndex(unionTypedIndex(false)) - .minimumTransportVersion(TransportVersionUtils.randomVersionNotSupporting(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2)) + .minimumTransportVersion(TransportVersionUtils.randomVersionNotSupporting(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2)) .query(query); LogicalPlan v2Plan = analyzer().addIndex(unionTypedIndex(true)) - .minimumTransportVersion(MultiTypeEsField2.ESQL_MULTI_TYPE_ES_FIELD_2) + .minimumTransportVersion(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2) .query(query); long legacyBytes = RamUsageTester.ramUsed(legacyPlan, ACCUMULATOR); @@ -79,7 +79,7 @@ public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { } /** - * Build a query that forces the analyzer to materialize a {@code MultiTypeEsField}/{@link MultiTypeEsField2} for every + * Build a query that forces the analyzer to materialize a {@code MultiTypeEsField}/{@link CompactMultiTypeEsField} for every * {@code id_} field by explicitly casting each to keyword. */ private static String buildExplicitConversionQuery(int numFields) { @@ -93,7 +93,7 @@ private static String buildExplicitConversionQuery(int numFields) { /** * Build a fake "idx*" pattern with {@code numIndices} concrete indices and {@code numConflictingFields} fields {@code id_0..id_}, * each with type {@code keyword} in half of the indices and {@code integer} in the other half. When {@code compact} is true the - * conflicting fields are built from {@link InvalidMappedField2} (truncated index lists), matching what a v2-capable coordinator + * conflicting fields are built from {@link CompactInvalidMappedField} (truncated index lists), matching what a v2-capable coordinator * produces; otherwise the full {@link InvalidMappedField} is used. */ private static IndexResolution unionTypedIndex(boolean compact) { @@ -113,7 +113,7 @@ private static IndexResolution unionTypedIndex(boolean compact) { mapping.put( fieldName, compact - ? new InvalidMappedField2(fieldName, perFieldTypesToIndices) + ? new CompactInvalidMappedField(fieldName, perFieldTypesToIndices) : new InvalidMappedField(fieldName, perFieldTypesToIndices) ); } From 108cc49dd5a0ea2783f72eb17698556b72d99351 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Tue, 28 Apr 2026 23:00:04 +0300 Subject: [PATCH 10/20] ESQL: Tighten compact union type internals CompactMultiTypeEsField now keys its conversion map by DataType instead of typeName(); the wire format is unchanged (DataType.writeTo already routes through PlanStreamOutput cached strings). CompactInvalidMappedField is gone, replaced by static factories InvalidMappedField.compact / compactPotentiallyUnmapped. The subclass added no state and overrode no behavior; it only forced widening InvalidMappedField's constructor to protected and makeErrorMessage to package-private. Both are private again. While here, switch the truncation tests to Strings.format so they stop breaking under tests.locale=as. Made-with: Cursor --- .../xpack/esql/analysis/Analyzer.java | 13 ++- .../core/type/CompactInvalidMappedField.java | 80 ------------------- .../core/type/CompactMultiTypeEsField.java | 16 ++-- .../esql/core/type/InvalidMappedField.java | 65 ++++++++++++++- .../type/CompactMultiTypeEsFieldTests.java | 22 ++--- ...va => InvalidMappedFieldCompactTests.java} | 28 ++++--- .../type/MultiTypeEsFieldMemoryTests.java | 9 +-- 7 files changed, 107 insertions(+), 126 deletions(-) delete mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/{CompactInvalidMappedFieldTests.java => InvalidMappedFieldCompactTests.java} (76%) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 90d08028c4d67..591102e8fd6eb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -2433,8 +2433,8 @@ private static EsField rewrapWithCast(FieldAttribute fa, Expression convertExpre ); } else { CompactMultiTypeEsField mtf = (CompactMultiTypeEsField) fa.field(); - Map typeToConversionExpressions = new HashMap<>(); - for (Map.Entry entry : mtf.getTypeToConversionExpressions().entrySet()) { + Map typeToConversionExpressions = new HashMap<>(); + for (Map.Entry entry : mtf.getTypeToConversionExpressions().entrySet()) { typeToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); } return new CompactMultiTypeEsField( @@ -2501,11 +2501,10 @@ static EsField resolvedMultiTypeEsField( } private static boolean canConvertOriginalTypes(EsField multiTypeEsField, Set supportedTypes) { - Map conversionExpressions = multiTypeEsField instanceof MultiTypeEsField legacy - ? legacy.getIndexToConversionExpressions() - : ((CompactMultiTypeEsField) multiTypeEsField).getTypeToConversionExpressions(); - return conversionExpressions.values() - .stream() + Collection conversionExpressions = multiTypeEsField instanceof MultiTypeEsField legacy + ? legacy.getIndexToConversionExpressions().values() + : ((CompactMultiTypeEsField) multiTypeEsField).getTypeToConversionExpressions().values(); + return conversionExpressions.stream() .allMatch( e -> e instanceof AbstractConvertFunction convertFunction && supportedTypes.contains(convertFunction.field().dataType().widenSmallNumeric()) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java deleted file mode 100644 index 82a16b6ca6c5b..0000000000000 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.core.type; - -import java.util.LinkedHashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -// FIXME(gal, NOCOMMIT) Go over this javadocs -// FIXME(gal, NOCOMMIT) Reduce duplication with InvalidMappedField -/** - * Memory-frugal variant of {@link InvalidMappedField}: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source type - * (plus the {@value #ELLIPSIS} sentinel when more existed) instead of the full per-type index list. Wide union-typed fields routinely span - * thousands of indices but the only consumers that need the full list are the legacy index-keyed conversion structures, and they aren't - * used on transport versions that support {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while still - * producing a good "[a, b, c, ...]" error message: the message itself is rendered from the full input map at construction time and then - * stored as a string, so we lose only the post-construction ability to enumerate every index. - * - *

Just like its parent, the {@code typesToIndices} map is not sent over the wire (it only matters during analysis on the coordinator), - * so {@code InvalidMappedField2} serializes identically to {@link InvalidMappedField}. Round-tripping through the wire therefore yields a - * plain {@link InvalidMappedField} on the receiving side — that's fine because {@code typesToIndices} is empty after deserialization - * anyway, so the truncation no longer matters. - */ -public class CompactInvalidMappedField extends InvalidMappedField { - private static final String ELLIPSIS = "..."; - private static final int MAX_INDICES_PER_TYPE = 3; - - public CompactInvalidMappedField(String name, Map> typesToIndices) { - super(name, makeErrorMessage(typesToIndices, false), new TreeMap<>(), truncate(typesToIndices), false, TimeSeriesFieldType.UNKNOWN); - } - - public static CompactInvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { - return new CompactInvalidMappedField( - name, - makeErrorMessage(typesToIndices, true), - new TreeMap<>(), - truncate(typesToIndices), - true, - TimeSeriesFieldType.UNKNOWN - ); - } - - private CompactInvalidMappedField( - String name, - String errorMessage, - Map properties, - Map> typesToIndices, - boolean isPotentiallyUnmapped, - TimeSeriesFieldType type - ) { - super(name, errorMessage, properties, typesToIndices, isPotentiallyUnmapped, type); - } - - /** - * Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries, appending the {@value #ELLIPSIS} sentinel iff anything was - * dropped. The retained 3 are picked by sorted order so that the (already truncated) error message and the stored set stay - * consistent. - */ - private static Map> truncate(Map> typesToIndices) { - Map> result = new TreeMap<>(); - for (Map.Entry> entry : typesToIndices.entrySet()) { - Set indices = entry.getValue(); - if (indices.size() <= MAX_INDICES_PER_TYPE) { - result.put(entry.getKey(), Set.copyOf(indices)); - } else { - Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); - indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); - truncated.add(ELLIPSIS); - result.put(entry.getKey(), Set.copyOf(truncated)); - } - } - return result; - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index be631527c4356..3409ac70d1731 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -39,7 +39,7 @@ public class CompactMultiTypeEsField extends EsField implements UnionTypeEsField // FIXME(gal, NOCOMMIT) rename public static final TransportVersion ESQL_MULTI_TYPE_ES_FIELD_2 = TransportVersion.fromName("esql_multi_type_es_field_2"); - private final Map typeToConversionExpressions; + private final Map typeToConversionExpressions; /** * If this is not {@code null}, then this expression should be used to convert the field value in case the field is not mapped in an @@ -52,7 +52,7 @@ public CompactMultiTypeEsField( String name, DataType dataType, boolean aggregatable, - Map typeToConversionExpressions, + Map typeToConversionExpressions, TimeSeriesFieldType timeSeriesFieldType, @Nullable Expression unmappedConversionExpression ) { @@ -66,7 +66,7 @@ protected CompactMultiTypeEsField(StreamInput in) throws IOException { ((PlanStreamInput) in).readCachedString(), DataType.readFrom(in), in.readBoolean(), - in.readImmutableMap(i -> i.readNamedWriteable(Expression.class)), + in.readImmutableMap(DataType::readFrom, i -> i.readNamedWriteable(Expression.class)), readTimeSeriesFieldType(in), in.readOptionalNamedWriteable(Expression.class) ); @@ -77,7 +77,7 @@ public void writeContent(StreamOutput out) throws IOException { ((PlanStreamOutput) out).writeCachedString(getName()); getDataType().writeTo(out); out.writeBoolean(isAggregatable()); - out.writeMap(typeToConversionExpressions, (o, v) -> out.writeNamedWriteable(v)); + out.writeMap(typeToConversionExpressions, (o, k) -> k.writeTo(o), (o, v) -> o.writeNamedWriteable(v)); writeTimeSeriesFieldType(out); out.writeOptionalNamedWriteable(unmappedConversionExpression); } @@ -92,7 +92,7 @@ public String getNodeStringName() { return "CompactMultiTypeEsField"; } - public Map getTypeToConversionExpressions() { + public Map getTypeToConversionExpressions() { return typeToConversionExpressions; } @@ -102,7 +102,7 @@ public Map getTypeToConversionExpressions() { * {@link #getUnmappedConversionExpression()} when the field is unmapped in the local index. */ public @Nullable Expression getConversionExpressionForType(DataType type) { - return typeToConversionExpressions.get(type.typeName()); + return typeToConversionExpressions.get(type); } @Override @@ -122,7 +122,7 @@ public static CompactMultiTypeEsField resolveFrom( ) { Map> typesToIndices = imf.getTypesToIndices(); DataType resolvedDataType = DataType.UNSUPPORTED; - Map filtered = new HashMap<>(); + Map filtered = new HashMap<>(); for (String typeName : typesToIndices.keySet()) { Expression convertExpr = typesToConversionExpressions.get(typeName); if (convertExpr == null) { @@ -133,7 +133,7 @@ public static CompactMultiTypeEsField resolveFrom( } else if (resolvedDataType != convertExpr.dataType()) { throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType()); } - filtered.put(typeName, convertExpr); + filtered.put(DataType.fromTypeName(typeName), convertExpr); } if (resolvedDataType == DataType.UNSUPPORTED && unmappedConversionExpression != null) { resolvedDataType = unmappedConversionExpression.dataType(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index 8fa6eeaab02b8..592a63744d088 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import java.io.IOException; +import java.util.LinkedHashSet; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -31,6 +32,9 @@ */ public class InvalidMappedField extends EsField { + private static final String ELLIPSIS = "..."; + private static final int MAX_INDICES_PER_TYPE = 3; + private final String errorMessage; private final Map> typesToIndices; private final boolean isPotentiallyUnmapped; @@ -63,7 +67,43 @@ public static InvalidMappedField potentiallyUnmapped(String name, Map{@code typesToIndices} is not sent over the wire (it only matters during analysis on the coordinator), so the truncation is a + * coordinator-local memory optimization with no BWC implications. + */ + public static InvalidMappedField compact(String name, Map> typesToIndices) { + return new InvalidMappedField( + name, + makeErrorMessage(typesToIndices, false), + new TreeMap<>(), + truncate(typesToIndices), + false, + TimeSeriesFieldType.UNKNOWN + ); + } + + /** + * {@link #potentiallyUnmapped} counterpart of {@link #compact}. + */ + public static InvalidMappedField compactPotentiallyUnmapped(String name, Map> typesToIndices) { + return new InvalidMappedField( + name, + makeErrorMessage(typesToIndices, true), + new TreeMap<>(), + truncate(typesToIndices), + true, + TimeSeriesFieldType.UNKNOWN + ); + } + + private InvalidMappedField( String name, String errorMessage, Map properties, @@ -142,7 +182,7 @@ public boolean isPotentiallyUnmapped() { return isPotentiallyUnmapped; } - static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { + private static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { StringBuilder errorMessage = new StringBuilder(); var isInsistKeywordOnlyKeyword = includeInsistKeyword && typesToIndices.containsKey(DataType.KEYWORD.typeName()) == false; errorMessage.append("mapped as ["); @@ -177,4 +217,25 @@ static String makeErrorMessage(Map> typesToIndices, boolean } return errorMessage.toString(); } + + /** + * Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries, appending the {@value #ELLIPSIS} sentinel iff anything was + * dropped. The retained entries are picked by sorted order so that the (already truncated) error message and the stored set stay + * consistent. + */ + private static Map> truncate(Map> typesToIndices) { + Map> result = new TreeMap<>(); + for (Map.Entry> entry : typesToIndices.entrySet()) { + Set indices = entry.getValue(); + if (indices.size() <= MAX_INDICES_PER_TYPE) { + result.put(entry.getKey(), Set.copyOf(indices)); + } else { + Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); + indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); + truncated.add(ELLIPSIS); + result.put(entry.getKey(), Set.copyOf(truncated)); + } + } + return result; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java index cfa5097435cb9..93aa9c19decd3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java @@ -61,7 +61,7 @@ protected CompactMultiTypeEsField createTestInstance() { boolean toString = randomBoolean(); DataType dataType = randomFrom(types()); DataType toType = toString ? DataType.KEYWORD : dataType; - Map typeToConvertExpressions = randomConvertExpressions(name, toString, dataType); + Map typeToConvertExpressions = randomConvertExpressions(name, toString, dataType); Expression unmappedConversionExpression = randomBoolean() ? null : createToString(name, dataType); EsField.TimeSeriesFieldType tsType = randomFrom(EsField.TimeSeriesFieldType.values()); @@ -73,7 +73,7 @@ protected CompactMultiTypeEsField createTestInstance() { protected CompactMultiTypeEsField mutateInstance(CompactMultiTypeEsField instance) throws IOException { String name = instance.getName(); DataType dataType = instance.getDataType(); - Map typeToConvertExpressions = instance.getTypeToConversionExpressions(); + Map typeToConvertExpressions = instance.getTypeToConversionExpressions(); EsField.TimeSeriesFieldType tsType = instance.getTimeSeriesFieldType(); Expression unmappedConversionExpression = instance.getUnmappedConversionExpression(); switch (between(0, 4)) { @@ -95,25 +95,25 @@ protected final NamedWriteableRegistry getNamedWriteableRegistry() { } /** - * Random map keyed by {@link DataType#typeName()}, so it can be used as the source-type map of + * Random map keyed by source {@link DataType}, so it can be used as the source-type map of * {@link CompactMultiTypeEsField}. */ - private Map randomConvertExpressions(String name, boolean toString, DataType dataType) { - Map typeToConvertExpressions = new HashMap<>(); + private Map randomConvertExpressions(String name, boolean toString, DataType dataType) { + Map typeToConvertExpressions = new HashMap<>(); if (toString) { - typeToConvertExpressions.put(DataType.KEYWORD.typeName(), createToString(name, DataType.KEYWORD)); - typeToConvertExpressions.put(dataType.typeName(), createToString(name, dataType)); + typeToConvertExpressions.put(DataType.KEYWORD, createToString(name, DataType.KEYWORD)); + typeToConvertExpressions.put(dataType, createToString(name, dataType)); } else { - typeToConvertExpressions.put(DataType.KEYWORD.typeName(), testConvertExpression(name, DataType.KEYWORD, dataType)); - typeToConvertExpressions.put(dataType.typeName(), testConvertExpression(name, dataType, dataType)); + typeToConvertExpressions.put(DataType.KEYWORD, testConvertExpression(name, DataType.KEYWORD, dataType)); + typeToConvertExpressions.put(dataType, testConvertExpression(name, dataType, dataType)); } return typeToConvertExpressions; } - private Map mutateConvertExpressions( + private Map mutateConvertExpressions( String name, DataType toType, - Map typeToConvertExpressions + Map typeToConvertExpressions ) { return randomValueOtherThan( typeToConvertExpressions, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedFieldCompactTests.java similarity index 76% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedFieldCompactTests.java index 9a174ee038cca..909f880380378 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedFieldCompactTests.java @@ -7,8 +7,8 @@ package org.elasticsearch.xpack.esql.type; +import org.elasticsearch.core.Strings; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; @@ -25,7 +25,11 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -public class CompactInvalidMappedFieldTests extends ESTestCase { +/** + * Tests for {@link InvalidMappedField#compact} and {@link InvalidMappedField#compactPotentiallyUnmapped}, which truncate per-type index + * lists for memory-frugal analysis on the coordinator. + */ +public class InvalidMappedFieldCompactTests extends ESTestCase { public void testKeepsAllIndicesWhenAtOrBelowLimit() { Map> input = Map.of( DataType.KEYWORD.typeName(), @@ -35,7 +39,7 @@ public void testKeepsAllIndicesWhenAtOrBelowLimit() { ); assertMap( - new CompactInvalidMappedField("f", input).getTypesToIndices(), + InvalidMappedField.compact("f", input).getTypesToIndices(), matchesMap().entry(DataType.KEYWORD.typeName(), Set.of("idx_a", "idx_b")) .entry(DataType.LONG.typeName(), Set.of("idx_c", "idx_d", "idx_e")) ); @@ -43,26 +47,25 @@ public void testKeepsAllIndicesWhenAtOrBelowLimit() { public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { Set manyIndices = IntStream.range(0, 5_000) - .mapToObj(i -> String.format("idx_%05d", i)) + .mapToObj(i -> Strings.format("idx_%05d", i)) .collect(Collectors.toCollection(LinkedHashSet::new)); - CompactInvalidMappedField input = new CompactInvalidMappedField("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); + InvalidMappedField field = InvalidMappedField.compact("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); assertMap( - input.getTypesToIndices(), + field.getTypesToIndices(), matchesMap().entry(DataType.KEYWORD.typeName(), Set.of("idx_00000", "idx_00001", "idx_00002", "...")) ); } public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { Set manyIndices = IntStream.range(0, 5_000) - .mapToObj(i -> String.format("idx_%05d", i)) + .mapToObj(i -> Strings.format("idx_%05d", i)) .collect(Collectors.toCollection(LinkedHashSet::new)); Map> input = new TreeMap<>(Map.of(DataType.KEYWORD.typeName(), manyIndices)); - String message = new CompactInvalidMappedField("f", input).errorMessage(); + String message = InvalidMappedField.compact("f", input).errorMessage(); - // FIXME(gal, NOCOMMIT) Can we have a less testing of the string contents? Maybe using a TreeMap? assertThat(message, containsString("[1] incompatible types")); assertThat(message, containsString("[idx_00000, idx_00001, idx_00002]")); assertThat(message, containsString("[" + (5_000 - 3) + "] other indices")); @@ -78,13 +81,13 @@ public void testErrorMessageMatchesInvalidMappedFieldForSmallInputs() { ) ); - assertThat(new CompactInvalidMappedField("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); + assertThat(InvalidMappedField.compact("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); } public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { Map> input = new TreeMap<>(Map.of(DataType.LONG.typeName(), new LinkedHashSet<>(Set.of("idx_a")))); - CompactInvalidMappedField field = CompactInvalidMappedField.potentiallyUnmapped("f", input); + InvalidMappedField field = InvalidMappedField.compactPotentiallyUnmapped("f", input); assertThat(field.isPotentiallyUnmapped(), equalTo(true)); assertThat(field.errorMessage(), containsString("[keyword] due to loading from _source")); @@ -101,7 +104,6 @@ public void testTypesReflectsKeysOfTruncatedMap() { ) ); - assertThat(new CompactInvalidMappedField("f", input).types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); + assertThat(InvalidMappedField.compact("f", input).types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); } - } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index 32d30a95adb1b..674086f7fcd99 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -11,7 +11,6 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TransportVersionUtils; -import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; @@ -35,8 +34,8 @@ /** * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially - * less memory under {@link CompactMultiTypeEsField} (paired with {@link CompactInvalidMappedField}'s truncated index lists) than under the legacy - * {@link org.elasticsearch.xpack.esql.core.type.MultiTypeEsField} (keyed per-index) paired with a full {@link InvalidMappedField}. + * less memory under {@link CompactMultiTypeEsField} (paired with {@link InvalidMappedField#compact}'s truncated index lists) than under + * the legacy {@link org.elasticsearch.xpack.esql.core.type.MultiTypeEsField} (keyed per-index) paired with a full {@link InvalidMappedField}. * *

The cost we're targeting is {@code O(num_fields * num_indices)}: each conflicting field expands into its own per-index conversion * map under legacy, and per-type under v2. With many fields the constant overhead from {@link EsIndex#indexNameWithModes} becomes a @@ -93,7 +92,7 @@ private static String buildExplicitConversionQuery(int numFields) { /** * Build a fake "idx*" pattern with {@code numIndices} concrete indices and {@code numConflictingFields} fields {@code id_0..id_}, * each with type {@code keyword} in half of the indices and {@code integer} in the other half. When {@code compact} is true the - * conflicting fields are built from {@link CompactInvalidMappedField} (truncated index lists), matching what a v2-capable coordinator + * conflicting fields are built from {@link InvalidMappedField#compact} (truncated index lists), matching what a v2-capable coordinator * produces; otherwise the full {@link InvalidMappedField} is used. */ private static IndexResolution unionTypedIndex(boolean compact) { @@ -113,7 +112,7 @@ private static IndexResolution unionTypedIndex(boolean compact) { mapping.put( fieldName, compact - ? new CompactInvalidMappedField(fieldName, perFieldTypesToIndices) + ? InvalidMappedField.compact(fieldName, perFieldTypesToIndices) : new InvalidMappedField(fieldName, perFieldTypesToIndices) ); } From c53408ef3c3fcc8f9dc66c35e9cb6fffb115cf14 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 14:16:24 +0300 Subject: [PATCH 11/20] ESQL: Share IMF/CIMF behavior via TypeConflictField CompactInvalidMappedField is now a peer of InvalidMappedField rather than a subclass: both implement a new TypeConflictField interface that holds the shared IMF-specific surface plus a static makeErrorMessage helper. Production analyzer/verifier/type-resolution sites branch on the interface so they remain oblivious to which flavor backs a field. Made-with: Cursor --- .../xpack/esql/analysis/Analyzer.java | 31 ++-- .../xpack/esql/analysis/Verifier.java | 4 +- .../esql/core/expression/Expressions.java | 6 +- .../esql/core/expression/FieldAttribute.java | 6 +- .../esql/core/expression/TypeResolutions.java | 4 +- .../core/type/CompactInvalidMappedField.java | 144 ++++++++++++++++++ .../core/type/CompactMultiTypeEsField.java | 6 +- .../esql/core/type/InvalidMappedField.java | 116 ++------------ .../esql/core/type/TypeConflictField.java | 98 ++++++++++++ .../xpack/esql/session/IndexResolver.java | 3 +- ...va => CompactInvalidMappedFieldTests.java} | 19 +-- .../type/MultiTypeEsFieldMemoryTests.java | 7 +- 12 files changed, 297 insertions(+), 147 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/{InvalidMappedFieldCompactTests.java => CompactInvalidMappedFieldTests.java} (81%) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 591102e8fd6eb..5e3ae179df455 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -59,6 +59,7 @@ import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.TypeConflictField; import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.core.util.CollectionUtils; @@ -2259,14 +2260,14 @@ private static boolean isMultiType(EsField field) { * deserializable on older nodes during a rolling upgrade. */ private static EsField buildMultiTypeEsField( - InvalidMappedField imf, + TypeConflictField imf, Map typesToConversionExpressions, @Nullable Expression unmappedConversionExpression, AnalyzerContext context ) { return context.minimumVersion().supports(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2) ? CompactMultiTypeEsField.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression) - : MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) + : MultiTypeEsField.resolveFrom((InvalidMappedField) imf, typesToConversionExpressions) .withPotentiallyUnmappedExpression(unmappedConversionExpression); } @@ -2355,7 +2356,7 @@ private static Expression resolveConvertFunction( AnalyzerContext context ) { Expression convertExpression = (Expression) convert; - if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { + if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof TypeConflictField imf) { HashMap typeResolutions = new HashMap<>(); Set supportedTypes = convert.supportedTypes(); if (convert instanceof FoldablesConvertFunction fcf) { @@ -2489,7 +2490,7 @@ static EsField resolvedMultiTypeEsField( AnalyzerContext context ) { Map typesToConversionExpressions = new HashMap<>(); - InvalidMappedField imf = (InvalidMappedField) fa.field(); + TypeConflictField imf = (TypeConflictField) fa.field(); imf.getTypesToIndices().forEach((typeName, indexNames) -> { DataType type = DataType.fromTypeName(typeName); TypeResolutionKey key = new TypeResolutionKey(fa.name(), type); @@ -2511,7 +2512,7 @@ private static boolean canConvertOriginalTypes(EsField multiTypeEsField, Set { - if (f.field() instanceof InvalidMappedField imf && allDates(context, imf)) { + if (f.field() instanceof TypeConflictField imf && allDates(context, imf)) { HashMap typeResolutions = new HashMap<>(); var convert = new ToDateNanos(f.source(), f, context.configuration()); imf.types().forEach(type -> typeResolutions(f, convert, type, imf, typeResolutions)); @@ -2642,7 +2643,7 @@ public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { }); } - private static boolean allDates(AnalyzerContext context, InvalidMappedField imf) { + private static boolean allDates(AnalyzerContext context, TypeConflictField imf) { if (imf.types().stream().allMatch(DataType::isDate) == false) { return false; } @@ -2659,7 +2660,7 @@ private static void typeResolutions( FieldAttribute fieldAttribute, ConvertFunction convert, DataType type, - InvalidMappedField imf, + TypeConflictField imf, HashMap typeResolutions ) { ResolveUnionTypes.TypeResolutionKey key = new ResolveUnionTypes.TypeResolutionKey(fieldAttribute.name(), type); @@ -2722,7 +2723,7 @@ private Expression tryToTransformBinaryComparison( Map unionFields, AnalyzerContext context ) { - if (original instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf && canBeCasted(imf)) { + if (original instanceof FieldAttribute fa && fa.field() instanceof TypeConflictField imf && canBeCasted(imf)) { Map typeConverters = new HashMap<>(); for (DataType type : imf.types()) { ConvertFunction convert = type == AGGREGATE_METRIC_DOUBLE @@ -2750,7 +2751,7 @@ private Expression tryToTransformBinaryComparison( return original; } - private static boolean canBeCasted(InvalidMappedField imf) { + private static boolean canBeCasted(TypeConflictField imf) { return imf.types().contains(AGGREGATE_METRIC_DOUBLE) && imf.types().stream().allMatch(f -> f == AGGREGATE_METRIC_DOUBLE || f.isNumeric()); } @@ -2762,7 +2763,7 @@ private Expression tryToTransformFunction( Map unionFields, AnalyzerContext context ) { - if (field instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { + if (field instanceof FieldAttribute fa && fa.field() instanceof TypeConflictField imf) { if (canBeCasted(imf) == false) { aborted.set(Boolean.TRUE); return aggFunc; @@ -2812,7 +2813,7 @@ private Expression tryToTransformFunction( return aggFunc; } - private Map typeConverters(AggregateFunction aggFunc, FieldAttribute fa, InvalidMappedField mtf) { + private Map typeConverters(AggregateFunction aggFunc, FieldAttribute fa, TypeConflictField mtf) { var metric = getMetric(aggFunc, isTimeSeries); Map typeConverter = new HashMap<>(); for (DataType type : mtf.types()) { @@ -2833,7 +2834,7 @@ private Map typeConverters(AggregateFunction aggFunc, FieldA return typeConverter; } - private Expression countConvert(UnaryScalarFunction convert, Source source, DataType type, InvalidMappedField imf) { + private Expression countConvert(UnaryScalarFunction convert, Source source, DataType type, TypeConflictField imf) { EsField field = new EsField(imf.getName(), type, imf.getProperties(), imf.isAggregatable(), imf.getTimeSeriesFieldType()); FieldAttribute originalFieldAttr = (FieldAttribute) convert.field(); FieldAttribute resolvedAttr = new FieldAttribute( @@ -3356,7 +3357,7 @@ private static List collectIncompatibleTypes(int columnIndex, List dataTypes = new ArrayList<>(); for (List out : outputs) { Attribute attr = out.get(columnIndex); - if (attr instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof TypeConflictField imf) { dataTypes.addAll(imf.types().stream().map(DataType::typeName).toList()); } else { dataTypes.add(attr.dataType().typeName()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 2cd7a7f60f4eb..bd058c121b6d6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -33,8 +33,8 @@ import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.TypeConflictField; import org.elasticsearch.xpack.esql.core.type.UnionTypeEsField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.core.util.Holder; @@ -644,7 +644,7 @@ private static void collectPotentiallyUnmappedNonKeywords( for (Map.Entry entry : mapping.entrySet()) { String name = prefix == null ? entry.getKey() : prefix + "." + entry.getKey(); EsField field = entry.getValue(); - if (field instanceof InvalidMappedField imf && imf.isPotentiallyUnmapped()) { + if (field instanceof TypeConflictField imf && imf.isPotentiallyUnmapped()) { aggregator.add(name); } if (field.getProperties() != null && field.getProperties().isEmpty() == false) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expressions.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expressions.java index c0d274bb51ece..da59e6b659a36 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expressions.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expressions.java @@ -8,7 +8,7 @@ import org.elasticsearch.core.Tuple; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.TypeConflictField; import java.util.ArrayList; import java.util.Collection; @@ -40,7 +40,7 @@ public static List asAttributes(List named * Converts named expressions to {@link ReferenceAttribute}s, preserving {@link NameId}s for attributes whose name * matches one in {@code existingOutput}. Genuinely new attributes get fresh NameIds. *

- * Exception: a {@link FieldAttribute} backed by an {@link InvalidMappedField} (ambiguous type across indices) is instead + * Exception: a {@link FieldAttribute} backed by a {@link TypeConflictField} (ambiguous type across indices) is instead * converted to an {@link UnsupportedAttribute} via * {@link FieldAttribute#flagTypeConflicts()}, so the analyzer can surface a clear user-facing error. */ @@ -60,7 +60,7 @@ public static List toReferenceAttributesPreservingIds( Attribute existing = existingByName.get(exp.name()); NameId id = existing != null ? existing.id() : new NameId(); Attribute refAttr = switch (exp) { - case FieldAttribute fa when fa.field() instanceof InvalidMappedField -> fa.flagTypeConflicts(); + case FieldAttribute fa when fa.field() instanceof TypeConflictField -> fa.flagTypeConflicts(); case ReferenceAttribute ra -> ra.withId(id); default -> new ReferenceAttribute(exp.source(), null, exp.name(), exp.dataType(), exp.nullable(), id, exp.synthetic()); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java index f027cda423cba..f9f53d8633886 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java @@ -16,7 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.TypeConflictField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; @@ -243,12 +243,12 @@ public FieldName fieldName() { } /** - * If the underlying field is an {@link InvalidMappedField} (ambiguous type across indices), + * If the underlying field is a {@link TypeConflictField} (ambiguous type across indices), * converts this attribute into an {@link UnsupportedAttribute} with a descriptive error message * so the analyzer can surface a clear user-facing error. */ public Attribute flagTypeConflicts() { - if (field instanceof InvalidMappedField imf) { + if (field instanceof TypeConflictField imf) { // Field has conflicting types across indices — build a user-facing error message. String unresolvedMessage = "Cannot use field [" + name() + "] due to ambiguities being " + imf.errorMessage(); List types = imf.getTypesToIndices().keySet().stream().toList(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java index ea0988c5e0f89..78d8758879f48 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java @@ -9,7 +9,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression.TypeResolution; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.TypeConflictField; import java.util.Locale; import java.util.StringJoiner; @@ -250,7 +250,7 @@ public static TypeResolution isType( // TODO: Shouldn't we perform widening of small numerical types here? if (allowUnionTypes && e instanceof FieldAttribute fa - && fa.field() instanceof InvalidMappedField imf + && fa.field() instanceof TypeConflictField imf && imf.types().stream().allMatch(predicate)) { return TypeResolution.TYPE_RESOLVED; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java new file mode 100644 index 0000000000000..a5f0df058ac9b --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java @@ -0,0 +1,144 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.core.type; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; + +import java.io.IOException; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; + +/** + * Memory-frugal counterpart to {@link InvalidMappedField}: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source + * type (plus the {@value #ELLIPSIS} sentinel when more existed) instead of the full per-type index list. Wide union-typed fields routinely + * span thousands of indices but the only consumers that need the full list are the legacy index-keyed conversion structures, and they + * aren't used on transport versions that support {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while + * still producing a good "[a, b, c, ...]" error message: the message itself is rendered from the full input map at construction time and + * then stored as a string, so we lose only the post-construction ability to enumerate every index. + * + *

The two classes share the {@link TypeConflictField} interface so consumers (the analyzer, the verifier, type resolution) can branch + * on it instead of either concrete class. {@link CompactInvalidMappedField} deliberately does not extend + * {@link InvalidMappedField}: their on-the-wire form is identical (the truncated/full {@code typesToIndices} map is never serialized) so + * sharing implementation via inheritance would only obscure the fact that they're peer flavors of the same field shape. + * + *

Wire format matches {@link InvalidMappedField} byte-for-byte and reuses its writeable name, so a {@code CompactInvalidMappedField} + * round-trips through the wire as a plain {@link InvalidMappedField} on the receiving side. That's fine because {@code typesToIndices} + * is empty after deserialization anyway, so the truncation no longer matters. + */ +public class CompactInvalidMappedField extends EsField implements TypeConflictField { + private static final String ELLIPSIS = "..."; + private static final int MAX_INDICES_PER_TYPE = 3; + + private final String errorMessage; + private final Map> typesToIndices; + private final boolean isPotentiallyUnmapped; + + public CompactInvalidMappedField(String name, Map> typesToIndices) { + this(name, TypeConflictField.makeErrorMessage(typesToIndices, false), truncate(typesToIndices), false); + } + + public static CompactInvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { + return new CompactInvalidMappedField( + name, + TypeConflictField.makeErrorMessage(typesToIndices, true), + truncate(typesToIndices), + true + ); + } + + private CompactInvalidMappedField( + String name, + String errorMessage, + Map> typesToIndices, + boolean isPotentiallyUnmapped + ) { + super(name, DataType.UNSUPPORTED, new TreeMap<>(), false, TimeSeriesFieldType.UNKNOWN); + this.errorMessage = errorMessage; + this.typesToIndices = typesToIndices; + this.isPotentiallyUnmapped = isPotentiallyUnmapped; + } + + @Override + public void writeContent(StreamOutput out) throws IOException { + ((PlanStreamOutput) out).writeCachedString(getName()); + out.writeString(errorMessage); + out.writeMap(getProperties(), (o, x) -> x.writeTo(out)); + writeTimeSeriesFieldType(out); + } + + @Override + public String getWriteableName(TransportVersion transportVersion) { + return "InvalidMappedField"; + } + + @Override + public String errorMessage() { + return errorMessage; + } + + @Override + public Map> getTypesToIndices() { + return typesToIndices; + } + + @Override + public boolean isPotentiallyUnmapped() { + return isPotentiallyUnmapped; + } + + @Override + public EsField getExactField() { + throw new QlIllegalArgumentException("Field [" + getName() + "] is invalid, cannot access it"); + } + + @Override + public Exact getExactInfo() { + return new Exact(false, "Field [" + getName() + "] is invalid, cannot access it"); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), errorMessage); + } + + @Override + public boolean equals(Object obj) { + if (super.equals(obj) == false) { + return false; + } + CompactInvalidMappedField other = (CompactInvalidMappedField) obj; + return Objects.equals(errorMessage, other.errorMessage); + } + + /** + * Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries, appending the {@value #ELLIPSIS} sentinel iff anything was + * dropped. The retained 3 are picked by sorted order so that the (already truncated) error message and the stored set stay + * consistent. + */ + private static Map> truncate(Map> typesToIndices) { + Map> result = new TreeMap<>(); + for (Map.Entry> entry : typesToIndices.entrySet()) { + Set indices = entry.getValue(); + if (indices.size() <= MAX_INDICES_PER_TYPE) { + result.put(entry.getKey(), Set.copyOf(indices)); + } else { + Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); + indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); + truncated.add(ELLIPSIS); + result.put(entry.getKey(), Set.copyOf(truncated)); + } + } + return result; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index 3409ac70d1731..f589f1ec8369b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -111,12 +111,12 @@ public Map getTypeToConversionExpressions() { } /** - * Build a {@link CompactMultiTypeEsField} from the per-type resolutions previously computed against an - * {@link InvalidMappedField}. Only types present in {@code imf.getTypesToIndices()} for which a + * Build a {@link CompactMultiTypeEsField} from the per-type resolutions previously computed against a + * {@link TypeConflictField}. Only types present in {@code imf.getTypesToIndices()} for which a * conversion was supplied are included. */ public static CompactMultiTypeEsField resolveFrom( - InvalidMappedField imf, + TypeConflictField imf, Map typesToConversionExpressions, @Nullable Expression unmappedConversionExpression ) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index 592a63744d088..56af477918349 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -15,12 +15,10 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import java.io.IOException; -import java.util.LinkedHashSet; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; -import java.util.stream.Collectors; /** * Representation of field mapped differently across indices; or being potentially unmapped in some, in which case it is treated as @@ -30,10 +28,7 @@ * not required through the cluster, only surviving as long as the Analyser phase of query planning. * It is used specifically for the 'union types' and 'unmapped fields' feature in ES|QL. */ -public class InvalidMappedField extends EsField { - - private static final String ELLIPSIS = "..."; - private static final int MAX_INDICES_PER_TYPE = 3; +public class InvalidMappedField extends EsField implements TypeConflictField { private final String errorMessage; private final Map> typesToIndices; @@ -48,56 +43,27 @@ public InvalidMappedField(String name, String errorMessage) { } public InvalidMappedField(String name, Map> typesToIndices) { - this(name, makeErrorMessage(typesToIndices, false), new TreeMap<>(), typesToIndices, false, TimeSeriesFieldType.UNKNOWN); - } - - /** - * An {@link InvalidMappedField} is potentially unmapped if at least one index does not contain a mapping for the field, and the user - * requested we load the values from {@code _source}. In that case, there is (possibly) an additional type conflict since we treat - * unmapped fields as {@link DataType#KEYWORD}. - */ - public static InvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { - return new InvalidMappedField( + this( name, - makeErrorMessage(typesToIndices, true), + TypeConflictField.makeErrorMessage(typesToIndices, false), new TreeMap<>(), typesToIndices, - true, - TimeSeriesFieldType.UNKNOWN - ); - } - - /** - * Memory-frugal variant: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source type (plus the {@value #ELLIPSIS} - * sentinel when more existed) instead of the full per-type index list. Wide union-typed fields routinely span thousands of indices but - * the only consumers that need the full list are the legacy index-keyed conversion structures, and they aren't used on transport - * versions that support {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while still producing a good - * "[a, b, c, ...]" error message: the message itself is rendered from the full input map at construction time and then stored as a - * string, so we lose only the post-construction ability to enumerate every index. - * - *

{@code typesToIndices} is not sent over the wire (it only matters during analysis on the coordinator), so the truncation is a - * coordinator-local memory optimization with no BWC implications. - */ - public static InvalidMappedField compact(String name, Map> typesToIndices) { - return new InvalidMappedField( - name, - makeErrorMessage(typesToIndices, false), - new TreeMap<>(), - truncate(typesToIndices), false, TimeSeriesFieldType.UNKNOWN ); } /** - * {@link #potentiallyUnmapped} counterpart of {@link #compact}. + * An {@link InvalidMappedField} is potentially unmapped if at least one index does not contain a mapping for the field, and the user + * requested we load the values from {@code _source}. In that case, there is (possibly) an additional type conflict since we treat + * unmapped fields as {@link DataType#KEYWORD}. */ - public static InvalidMappedField compactPotentiallyUnmapped(String name, Map> typesToIndices) { + public static InvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { return new InvalidMappedField( name, - makeErrorMessage(typesToIndices, true), + TypeConflictField.makeErrorMessage(typesToIndices, true), new TreeMap<>(), - truncate(typesToIndices), + typesToIndices, true, TimeSeriesFieldType.UNKNOWN ); @@ -128,10 +94,6 @@ protected InvalidMappedField(StreamInput in) throws IOException { ); } - public Set types() { - return typesToIndices.keySet().stream().map(DataType::fromTypeName).collect(Collectors.toSet()); - } - @Override public void writeContent(StreamOutput out) throws IOException { ((PlanStreamOutput) out).writeCachedString(getName()); @@ -144,6 +106,7 @@ public String getWriteableName(TransportVersion transportVersion) { return "InvalidMappedField"; } + @Override public String errorMessage() { return errorMessage; } @@ -174,68 +137,13 @@ public Exact getExactInfo() { return new Exact(false, "Field [" + getName() + "] is invalid, cannot access it"); } + @Override public Map> getTypesToIndices() { return typesToIndices; } + @Override public boolean isPotentiallyUnmapped() { return isPotentiallyUnmapped; } - - private static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { - StringBuilder errorMessage = new StringBuilder(); - var isInsistKeywordOnlyKeyword = includeInsistKeyword && typesToIndices.containsKey(DataType.KEYWORD.typeName()) == false; - errorMessage.append("mapped as ["); - errorMessage.append(typesToIndices.size() + (isInsistKeywordOnlyKeyword ? 1 : 0)); - errorMessage.append("] incompatible types: "); - boolean first = true; - if (isInsistKeywordOnlyKeyword) { - first = false; - errorMessage.append("[keyword] due to loading from _source"); - } - for (Map.Entry> e : typesToIndices.entrySet()) { - if (first) { - first = false; - } else { - errorMessage.append(", "); - } - errorMessage.append("["); - errorMessage.append(e.getKey()); - errorMessage.append("] "); - if (e.getKey().equals(DataType.KEYWORD.typeName()) && includeInsistKeyword) { - errorMessage.append("due to loading from _source and in "); - } else { - errorMessage.append("in "); - } - if (e.getValue().size() <= 3) { - errorMessage.append(e.getValue()); - } else { - errorMessage.append(e.getValue().stream().sorted().limit(3).collect(Collectors.toList())); - errorMessage.append(" and [" + (e.getValue().size() - 3) + "] other "); - errorMessage.append(e.getValue().size() == 4 ? "index" : "indices"); - } - } - return errorMessage.toString(); - } - - /** - * Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries, appending the {@value #ELLIPSIS} sentinel iff anything was - * dropped. The retained entries are picked by sorted order so that the (already truncated) error message and the stored set stay - * consistent. - */ - private static Map> truncate(Map> typesToIndices) { - Map> result = new TreeMap<>(); - for (Map.Entry> entry : typesToIndices.entrySet()) { - Set indices = entry.getValue(); - if (indices.size() <= MAX_INDICES_PER_TYPE) { - result.put(entry.getKey(), Set.copyOf(indices)); - } else { - Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); - indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); - truncated.add(ELLIPSIS); - result.put(entry.getKey(), Set.copyOf(truncated)); - } - } - return result; - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java new file mode 100644 index 0000000000000..4a309e2820614 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java @@ -0,0 +1,98 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.core.type; + +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Common surface for the two ESQL field flavors that carry per-index type-conflict information: the legacy + * {@link InvalidMappedField} (full per-type index lists) and the memory-frugal {@link CompactInvalidMappedField} (truncated lists). + * Production code that consumes these fields (analyzer rules, the verifier, type resolution) should branch on this interface so it + * stays oblivious to which flavor a particular {@link EsField} happens to be. + * + *

The {@code getName} / {@code getProperties} / {@code isAggregatable} / {@code getTimeSeriesFieldType} accessors are all + * provided for free by {@link EsField}, which both implementations extend; they're declared here so consumers can pull everything + * they need off a single typed reference. + */ +public interface TypeConflictField { + + String getName(); + + Map getProperties(); + + boolean isAggregatable(); + + EsField.TimeSeriesFieldType getTimeSeriesFieldType(); + + /** + * Pre-rendered, user-facing error message describing the conflict. Built from the full input map at construction time so it + * survives the index-list truncation done by {@link CompactInvalidMappedField}. + */ + String errorMessage(); + + /** + * Per-source-type indices in which the field appears with that type. Note that {@link CompactInvalidMappedField} caps each set + * and may include the {@code "..."} sentinel; callers that need a complete index list should use {@link InvalidMappedField} + * instead. + */ + Map> getTypesToIndices(); + + /** + * Whether the field is unmapped in at least one index, in which case it is treated as {@link DataType#KEYWORD} for the unmapped + * indices. + */ + boolean isPotentiallyUnmapped(); + + /** + * Source data types observed for this field across all indices. + */ + default Set types() { + return getTypesToIndices().keySet().stream().map(DataType::fromTypeName).collect(Collectors.toSet()); + } + + /** + * Build the user-facing error message for a per-type-to-indices map. Shared between both implementations so they stay in sync. + */ + static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { + StringBuilder errorMessage = new StringBuilder(); + var isInsistKeywordOnlyKeyword = includeInsistKeyword && typesToIndices.containsKey(DataType.KEYWORD.typeName()) == false; + errorMessage.append("mapped as ["); + errorMessage.append(typesToIndices.size() + (isInsistKeywordOnlyKeyword ? 1 : 0)); + errorMessage.append("] incompatible types: "); + boolean first = true; + if (isInsistKeywordOnlyKeyword) { + first = false; + errorMessage.append("[keyword] due to loading from _source"); + } + for (Map.Entry> e : typesToIndices.entrySet()) { + if (first) { + first = false; + } else { + errorMessage.append(", "); + } + errorMessage.append("["); + errorMessage.append(e.getKey()); + errorMessage.append("] "); + if (e.getKey().equals(DataType.KEYWORD.typeName()) && includeInsistKeyword) { + errorMessage.append("due to loading from _source and in "); + } else { + errorMessage.append("in "); + } + if (e.getValue().size() <= 3) { + errorMessage.append(e.getValue()); + } else { + errorMessage.append(e.getValue().stream().sorted().limit(3).collect(Collectors.toList())); + errorMessage.append(" and [" + (e.getValue().size() - 3) + "] other "); + errorMessage.append(e.getValue().size() == 4 ? "index" : "indices"); + } + } + return errorMessage.toString(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index 482b01a5f3e3d..8f2d87cb7c006 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -40,6 +40,7 @@ import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; import org.elasticsearch.xpack.esql.core.type.SupportedVersion; import org.elasticsearch.xpack.esql.core.type.TextEsField; +import org.elasticsearch.xpack.esql.core.type.TypeConflictField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; @@ -538,7 +539,7 @@ public static EsField wrapPartiallyUnmappedField(EsField field, String name, Str case KEYWORD -> new PotentiallyUnmappedKeywordEsField(fullName); default -> InvalidMappedField.potentiallyUnmapped( name, - field instanceof InvalidMappedField imf + field instanceof TypeConflictField imf ? imf.getTypesToIndices() : Map.of(field.getDataType().widenSmallNumeric().typeName(), mappedIndices) ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedFieldCompactTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java similarity index 81% rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedFieldCompactTests.java rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java index 909f880380378..34df3d1f441a0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/InvalidMappedFieldCompactTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; @@ -25,11 +26,7 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -/** - * Tests for {@link InvalidMappedField#compact} and {@link InvalidMappedField#compactPotentiallyUnmapped}, which truncate per-type index - * lists for memory-frugal analysis on the coordinator. - */ -public class InvalidMappedFieldCompactTests extends ESTestCase { +public class CompactInvalidMappedFieldTests extends ESTestCase { public void testKeepsAllIndicesWhenAtOrBelowLimit() { Map> input = Map.of( DataType.KEYWORD.typeName(), @@ -39,7 +36,7 @@ public void testKeepsAllIndicesWhenAtOrBelowLimit() { ); assertMap( - InvalidMappedField.compact("f", input).getTypesToIndices(), + new CompactInvalidMappedField("f", input).getTypesToIndices(), matchesMap().entry(DataType.KEYWORD.typeName(), Set.of("idx_a", "idx_b")) .entry(DataType.LONG.typeName(), Set.of("idx_c", "idx_d", "idx_e")) ); @@ -50,7 +47,7 @@ public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { .mapToObj(i -> Strings.format("idx_%05d", i)) .collect(Collectors.toCollection(LinkedHashSet::new)); - InvalidMappedField field = InvalidMappedField.compact("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); + CompactInvalidMappedField field = new CompactInvalidMappedField("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); assertMap( field.getTypesToIndices(), @@ -64,7 +61,7 @@ public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { .collect(Collectors.toCollection(LinkedHashSet::new)); Map> input = new TreeMap<>(Map.of(DataType.KEYWORD.typeName(), manyIndices)); - String message = InvalidMappedField.compact("f", input).errorMessage(); + String message = new CompactInvalidMappedField("f", input).errorMessage(); assertThat(message, containsString("[1] incompatible types")); assertThat(message, containsString("[idx_00000, idx_00001, idx_00002]")); @@ -81,13 +78,13 @@ public void testErrorMessageMatchesInvalidMappedFieldForSmallInputs() { ) ); - assertThat(InvalidMappedField.compact("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); + assertThat(new CompactInvalidMappedField("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); } public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { Map> input = new TreeMap<>(Map.of(DataType.LONG.typeName(), new LinkedHashSet<>(Set.of("idx_a")))); - InvalidMappedField field = InvalidMappedField.compactPotentiallyUnmapped("f", input); + CompactInvalidMappedField field = CompactInvalidMappedField.potentiallyUnmapped("f", input); assertThat(field.isPotentiallyUnmapped(), equalTo(true)); assertThat(field.errorMessage(), containsString("[keyword] due to loading from _source")); @@ -104,6 +101,6 @@ public void testTypesReflectsKeysOfTruncatedMap() { ) ); - assertThat(InvalidMappedField.compact("f", input).types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); + assertThat(new CompactInvalidMappedField("f", input).types(), containsInAnyOrder(DataType.KEYWORD, DataType.LONG)); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index 674086f7fcd99..38cabe0599311 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -11,6 +11,7 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; @@ -34,7 +35,7 @@ /** * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially - * less memory under {@link CompactMultiTypeEsField} (paired with {@link InvalidMappedField#compact}'s truncated index lists) than under + * less memory under {@link CompactMultiTypeEsField} (paired with {@link CompactInvalidMappedField}'s truncated index lists) than under * the legacy {@link org.elasticsearch.xpack.esql.core.type.MultiTypeEsField} (keyed per-index) paired with a full {@link InvalidMappedField}. * *

The cost we're targeting is {@code O(num_fields * num_indices)}: each conflicting field expands into its own per-index conversion @@ -92,7 +93,7 @@ private static String buildExplicitConversionQuery(int numFields) { /** * Build a fake "idx*" pattern with {@code numIndices} concrete indices and {@code numConflictingFields} fields {@code id_0..id_}, * each with type {@code keyword} in half of the indices and {@code integer} in the other half. When {@code compact} is true the - * conflicting fields are built from {@link InvalidMappedField#compact} (truncated index lists), matching what a v2-capable coordinator + * conflicting fields are built from {@link CompactInvalidMappedField} (truncated index lists), matching what a v2-capable coordinator * produces; otherwise the full {@link InvalidMappedField} is used. */ private static IndexResolution unionTypedIndex(boolean compact) { @@ -112,7 +113,7 @@ private static IndexResolution unionTypedIndex(boolean compact) { mapping.put( fieldName, compact - ? InvalidMappedField.compact(fieldName, perFieldTypesToIndices) + ? new CompactInvalidMappedField(fieldName, perFieldTypesToIndices) : new InvalidMappedField(fieldName, perFieldTypesToIndices) ); } From fd03d2111dcd9a1c53dc8e1a8703cdb8290648c1 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 15:34:51 +0300 Subject: [PATCH 12/20] More human refacotrs --- .../xpack/esql/analysis/Analyzer.java | 105 +++++++++--------- .../core/type/CompactInvalidMappedField.java | 44 ++++---- .../core/type/CompactMultiTypeEsField.java | 13 ++- .../esql/core/type/InvalidMappedField.java | 3 +- .../esql/core/type/MultiTypeEsField.java | 9 +- .../esql/core/type/TypeConflictField.java | 4 +- .../esql/core/type/UnionTypeEsField.java | 12 +- .../planner/EsPhysicalOperationProviders.java | 28 +++-- .../type/CompactMultiTypeEsFieldTests.java | 1 - .../type/MultiTypeEsFieldMemoryTests.java | 38 +++---- 10 files changed, 128 insertions(+), 129 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 5e3ae179df455..c5729b2c52d46 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.analysis; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.TransportVersion; import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.logging.LoggerMessageFormat; @@ -176,7 +177,6 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -489,8 +489,8 @@ protected LogicalPlan rule(UnresolvedExternalRelation plan, AnalyzerContext cont private String extractTablePath(Expression tablePath) { if (tablePath instanceof Literal literal && literal.value() != null) { Object value = literal.value(); - if (value instanceof org.apache.lucene.util.BytesRef) { - return BytesRefs.toString((org.apache.lucene.util.BytesRef) value); + if (value instanceof BytesRef) { + return BytesRefs.toString((BytesRef) value); } return value.toString(); } @@ -1274,7 +1274,7 @@ private LogicalPlan resolveFuse(Fuse fuse, List childrenOutput) { LogicalPlan scoreEval = new FuseScoreEval(source, fuse.child(), score, discriminator, fuse.fuseType(), fuse.options()); // create aggregations - Expression aggFilter = new Literal(source, true, DataType.BOOLEAN); + Expression aggFilter = new Literal(source, true, BOOLEAN); List aggregates = new ArrayList<>(); aggregates.add( @@ -1315,7 +1315,7 @@ private LogicalPlan resolvePromql(PromqlCommand promql, List children ); // Wrap in an explicit LIMIT 0 so that AddImplicitLimit skips the "No limit defined" warning, // which would otherwise fire because the LocalRelation contains no PromqlCommand marker. - return new Limit(source, new Literal(source, 0, DataType.INTEGER), localRelation); + return new Limit(source, new Literal(source, 0, INTEGER), localRelation); } LogicalPlan promqlPlan = promql.promqlPlan(); Function lambda = ua -> maybeResolveAttribute(ua, childrenOutput); @@ -1672,7 +1672,7 @@ private static List potentialCandidatesIfNoMatchesFound( UnresolvedAttribute ua, List matches, Collection attrList, - java.util.function.Function, String> messageProducer + Function, String> messageProducer ) { if (ua.customMessage()) { return List.of(); @@ -1867,7 +1867,7 @@ public LogicalPlan apply(LogicalPlan logicalPlan, AnalyzerContext context) { // entries to the max } var source = logicalPlan.source(); - return new Limit(source, new Literal(source, limit, DataType.INTEGER), logicalPlan); + return new Limit(source, new Literal(source, limit, INTEGER), logicalPlan); } } @@ -2250,10 +2250,6 @@ private static class ResolveUnionTypes extends ParameterizedRule { if (supportedTypes.contains(type.widenSmallNumeric())) { @@ -2384,7 +2380,7 @@ private static Expression resolveConvertFunction( } } else if (convert.field() instanceof FieldAttribute fa && fa.synthetic() == false // MultiTypeEsField in EsRelation created by DateMillisToNanosInEsRelation has synthetic = false - && isMultiType(fa.field())) { + && fa.field() instanceof UnionTypeEsField unionTypeEsField) { // This is an explicit casting of a union typed field that has been converted to MultiTypeEsField in EsRelation by // DateMillisToNanosInEsRelation, it is not necessary to cast it again to the same type, replace the implicit casting // with explicit casting. However, it is useful to differentiate implicit and explicit casting in some cases, for @@ -2398,60 +2394,63 @@ && isMultiType(fa.field())) { // Data type is different between implicit(date_nanos) and explicit casting, if the conversion is supported, create a // new MultiTypeEsField with explicit casting type, and add it to unionFieldAttributes. Set supportedTypes = convert.supportedTypes(); - if (supportedTypes.contains(fa.dataType()) && canConvertOriginalTypes(fa.field(), supportedTypes)) { + if (supportedTypes.contains(fa.dataType()) && canConvertOriginalTypes(unionTypeEsField, supportedTypes)) { // The only code that creates MultiTypeEsField with synthetic=false (reaching this branch) is // DateMillisToNanosInEsRelation, which runs in the "Initialize" batch before ResolveUnmapped. At that point, // unmapped fields haven't been detected yet, so potentiallyUnmappedExpression is always null. if (((UnionTypeEsField) fa.field()).getUnmappedConversionExpression() != null) { throw new IllegalStateException("Unexpected potentially unmapped expression for [" + fa.fieldName() + "]"); } - EsField multiTypeEsField = rewrapWithCast(fa, convertExpression, context); + EsField multiTypeEsField = rewrapWithCast(fa, convertExpression); return createIfDoesNotAlreadyExist(fa, multiTypeEsField, unionFieldAttributes); } } else if (convert.field() instanceof AbstractConvertFunction subConvert) { return convertExpression.replaceChildren( - Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes, context)) + singletonList(resolveConvertFunction(subConvert, unionFieldAttributes, context)) ); } return convertExpression; } - private static EsField rewrapWithCast(FieldAttribute fa, Expression convertExpression, AnalyzerContext context) { - // Wraps an existing union-type field's per-(index|type) conversions with another conversion expression on top, so the - // composite expression first does the original cast then the additional cast. Works for both legacy and v2 MultiTypeEsField. - if (fa.field() instanceof MultiTypeEsField mtf) { - Map indexToConversionExpressions = new HashMap<>(); - for (Map.Entry entry : mtf.getIndexToConversionExpressions().entrySet()) { - indexToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); - } - return new MultiTypeEsField( - fa.fieldName().string(), - convertExpression.dataType(), - false, - indexToConversionExpressions, - fa.field().getTimeSeriesFieldType(), - null - ); - } else { - CompactMultiTypeEsField mtf = (CompactMultiTypeEsField) fa.field(); - Map typeToConversionExpressions = new HashMap<>(); - for (Map.Entry entry : mtf.getTypeToConversionExpressions().entrySet()) { - typeToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); - } - return new CompactMultiTypeEsField( - fa.fieldName().string(), - convertExpression.dataType(), - false, - typeToConversionExpressions, - fa.field().getTimeSeriesFieldType(), - null - ); + // Wraps an existing union-type field's per-(index|type) conversions with another conversion expression on top, so the + // composite expression first does the original cast then the additional cast. + private static EsField rewrapWithCast(FieldAttribute fa, Expression convertExpression) { + // FIXME(gal, NOCOMMIT) reduce duplication + switch ((UnionTypeEsField) fa.field()) { + case CompactMultiTypeEsField compact -> { + Map typeToConversionExpressions = new HashMap<>(); + for (Map.Entry entry : compact.getTypeToConversionExpressions().entrySet()) { + typeToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); + } + return new CompactMultiTypeEsField( + fa.fieldName().string(), + convertExpression.dataType(), + false, + typeToConversionExpressions, + fa.field().getTimeSeriesFieldType(), + null + ); + } + case MultiTypeEsField legacy -> { + Map indexToConversionExpressions = new HashMap<>(); + for (Map.Entry entry : legacy.getIndexToConversionExpressions().entrySet()) { + indexToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); + } + return new MultiTypeEsField( + fa.fieldName().string(), + convertExpression.dataType(), + false, + indexToConversionExpressions, + fa.field().getTimeSeriesFieldType(), + null + ); + } } } private static Expression wrapWith(Expression convertExpression, Expression originalConvertFunction) { AbstractConvertFunction inner = (AbstractConvertFunction) originalConvertFunction; - return convertExpression.replaceChildren(Collections.singletonList(inner.field())); + return convertExpression.replaceChildren(singletonList(inner.field())); } private static Expression createIfDoesNotAlreadyExist( @@ -2501,11 +2500,9 @@ static EsField resolvedMultiTypeEsField( return buildMultiTypeEsField(imf, typesToConversionExpressions, potentiallyUnmappedConversion, context); } - private static boolean canConvertOriginalTypes(EsField multiTypeEsField, Set supportedTypes) { - Collection conversionExpressions = multiTypeEsField instanceof MultiTypeEsField legacy - ? legacy.getIndexToConversionExpressions().values() - : ((CompactMultiTypeEsField) multiTypeEsField).getTypeToConversionExpressions().values(); - return conversionExpressions.stream() + private static boolean canConvertOriginalTypes(UnionTypeEsField unionTypeEsField, Set supportedTypes) { + return unionTypeEsField.getConversionExpressions() + .stream() .allMatch( e -> e instanceof AbstractConvertFunction convertFunction && supportedTypes.contains(convertFunction.field().dataType().widenSmallNumeric()) @@ -3028,7 +3025,7 @@ private static LogicalPlan maybePushDownConvertFunctions( Alias newAlias = new Alias( oldAttr.source(), newAliasName, // oldAttrName$$converted_to$$targetType - convert.replaceChildren(Collections.singletonList(oldAttr)), + convert.replaceChildren(singletonList(oldAttr)), null, // generate a new id true // this'll be used to Project the synthetic attributes out when finishing analysis ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java index a5f0df058ac9b..f064bf95d75bd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java @@ -7,25 +7,27 @@ package org.elasticsearch.xpack.esql.core.type; +import org.checkerframework.checker.nullness.qual.NonNull; import org.elasticsearch.TransportVersion; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; -import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import java.io.IOException; +import java.util.Collections; import java.util.LinkedHashSet; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; +// FIXME(gal, NOCOMMIT) Go over these docs /** * Memory-frugal counterpart to {@link InvalidMappedField}: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source - * type (plus the {@value #ELLIPSIS} sentinel when more existed) instead of the full per-type index list. Wide union-typed fields routinely - * span thousands of indices but the only consumers that need the full list are the legacy index-keyed conversion structures, and they - * aren't used on transport versions that support {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while - * still producing a good "[a, b, c, ...]" error message: the message itself is rendered from the full input map at construction time and - * then stored as a string, so we lose only the post-construction ability to enumerate every index. + * type instead of the full per-type index list. Wide union-typed fields routinely span thousands of indices but the only consumers that + * need the full list are the legacy index-keyed conversion structures, and they aren't used on transport versions that support + * {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while still producing a good "[a, b, c, ...]" error + * message: the message itself is rendered from the full input map at construction time and then stored as a string, so we lose only the + * post-construction ability to enumerate every index. * *

The two classes share the {@link TypeConflictField} interface so consumers (the analyzer, the verifier, type resolution) can branch * on it instead of either concrete class. {@link CompactInvalidMappedField} deliberately does not extend @@ -36,8 +38,7 @@ * round-trips through the wire as a plain {@link InvalidMappedField} on the receiving side. That's fine because {@code typesToIndices} * is empty after deserialization anyway, so the truncation no longer matters. */ -public class CompactInvalidMappedField extends EsField implements TypeConflictField { - private static final String ELLIPSIS = "..."; +public final class CompactInvalidMappedField extends EsField implements TypeConflictField { private static final int MAX_INDICES_PER_TYPE = 3; private final String errorMessage; @@ -71,10 +72,7 @@ private CompactInvalidMappedField( @Override public void writeContent(StreamOutput out) throws IOException { - ((PlanStreamOutput) out).writeCachedString(getName()); - out.writeString(errorMessage); - out.writeMap(getProperties(), (o, x) -> x.writeTo(out)); - writeTimeSeriesFieldType(out); + throw new UnsupportedOperationException("CompactInvalidMappedField shouldn't be transported"); } @Override @@ -121,24 +119,20 @@ public boolean equals(Object obj) { return Objects.equals(errorMessage, other.errorMessage); } - /** - * Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries, appending the {@value #ELLIPSIS} sentinel iff anything was - * dropped. The retained 3 are picked by sorted order so that the (already truncated) error message and the stored set stay - * consistent. - */ + /** Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries. */ private static Map> truncate(Map> typesToIndices) { Map> result = new TreeMap<>(); for (Map.Entry> entry : typesToIndices.entrySet()) { Set indices = entry.getValue(); - if (indices.size() <= MAX_INDICES_PER_TYPE) { - result.put(entry.getKey(), Set.copyOf(indices)); - } else { - Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); - indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); - truncated.add(ELLIPSIS); - result.put(entry.getKey(), Set.copyOf(truncated)); - } + result.put(entry.getKey(), indices.size() <= MAX_INDICES_PER_TYPE ? Set.copyOf(indices) : truncate(indices)); } return result; } + + private static @NonNull Set truncate(Set indices) { + Set truncated = new LinkedHashSet<>(MAX_INDICES_PER_TYPE + 1); + indices.stream().sorted().limit(MAX_INDICES_PER_TYPE).forEach(truncated::add); + truncated.add("..."); + return Collections.unmodifiableSet(truncated); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index f589f1ec8369b..c6588431032d4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -17,12 +17,13 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import java.io.IOException; +import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.Set; -// FIXME(gal, NOCOMMIT) Go over this javadocs +// FIXME(gal, NOCOMMIT) Go over these javadocs // FIXME(gal, NOCOMMIT) Reduce duplication with MultiTypeEsField /** * Memory-efficient variant of {@link MultiTypeEsField} that stores the per-source-type conversion @@ -35,7 +36,7 @@ * legacy {@link MultiTypeEsField} when the cluster minimum transport version does not yet support * {@code esql_multi_type_es_field_2}. */ -public class CompactMultiTypeEsField extends EsField implements UnionTypeEsField { +public final class CompactMultiTypeEsField extends EsField implements UnionTypeEsField { // FIXME(gal, NOCOMMIT) rename public static final TransportVersion ESQL_MULTI_TYPE_ES_FIELD_2 = TransportVersion.fromName("esql_multi_type_es_field_2"); @@ -96,10 +97,14 @@ public Map getTypeToConversionExpressions() { return typeToConversionExpressions; } + @Override + public Collection getConversionExpressions() { + return typeToConversionExpressions.values(); + } + /** * Returns the conversion expression to apply for the given source {@link DataType}, or {@code null} - * if no conversion is registered for that type. Callers should fall back to - * {@link #getUnmappedConversionExpression()} when the field is unmapped in the local index. + * if no conversion is registered for that type. */ public @Nullable Expression getConversionExpressionForType(DataType type) { return typeToConversionExpressions.get(type); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index 56af477918349..82917b8322ec3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -20,6 +20,7 @@ import java.util.Set; import java.util.TreeMap; +// FIXME(gal, NOCOMMIT) Redocument that this is for BWC /** * Representation of field mapped differently across indices; or being potentially unmapped in some, in which case it is treated as * {@link DataType#KEYWORD} in the indices where it is unmapped. @@ -28,7 +29,7 @@ * not required through the cluster, only surviving as long as the Analyser phase of query planning. * It is used specifically for the 'union types' and 'unmapped fields' feature in ES|QL. */ -public class InvalidMappedField extends EsField implements TypeConflictField { +public final class InvalidMappedField extends EsField implements TypeConflictField { private final String errorMessage; private final Map> typesToIndices; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index 42cac052be5aa..b658372df6fd5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -17,11 +17,13 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import java.io.IOException; +import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.Set; +// FIXME(gal, NOCOMMIT) Redocument that this is for BWC. /** * During IndexResolution it can occur that the same field is mapped to different types in different indices. * An {@link InvalidMappedField} holds that information and allows for later resolution of the field @@ -31,7 +33,7 @@ * this class instead of the {@link InvalidMappedField}. * This class is sent to the data nodes to inform them that they have to convert the type directly during field extraction. */ -public class MultiTypeEsField extends EsField implements UnionTypeEsField { +public final class MultiTypeEsField extends EsField implements UnionTypeEsField { private static final TransportVersion POTENTIALLY_UNMAPPED_EXPRESSION = TransportVersion.fromName( "esql_potentially_unmapped_expression" ); @@ -103,6 +105,11 @@ public Map getIndexToConversionExpressions() { return indexToConversionExpressions; } + @Override + public Collection getConversionExpressions() { + return indexToConversionExpressions.values(); + } + public @Nullable Expression getConversionExpressionForIndex(String indexName) { return indexToConversionExpressions.get(indexName); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java index 4a309e2820614..aeb0d3b1ad2ef 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java @@ -11,6 +11,7 @@ import java.util.Set; import java.util.stream.Collectors; +// FIXME(gal, NOCOMMIT) Go over these docs /** * Common surface for the two ESQL field flavors that carry per-index type-conflict information: the legacy * {@link InvalidMappedField} (full per-type index lists) and the memory-frugal {@link CompactInvalidMappedField} (truncated lists). @@ -21,8 +22,7 @@ * provided for free by {@link EsField}, which both implementations extend; they're declared here so consumers can pull everything * they need off a single typed reference. */ -public interface TypeConflictField { - +public sealed interface TypeConflictField permits InvalidMappedField, CompactInvalidMappedField { String getName(); Map getProperties(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java index 5eba83c5f9eb5..022dabe688f05 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java @@ -10,19 +10,21 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Expression; +import java.util.Collection; + /** * Common interface implemented by both {@link MultiTypeEsField} (legacy, keyed by index name) and * {@link CompactMultiTypeEsField} (newer, keyed by source data type) so that callers that only care about * the existence of a per-(index|type) conversion or about the unmapped-side conversion can treat the * two implementations uniformly. */ -public interface UnionTypeEsField { - +public sealed interface UnionTypeEsField permits MultiTypeEsField, CompactMultiTypeEsField { /** - * Conversion expression to apply when the field is unmapped in the index, treating it as - * {@link DataType#KEYWORD}, or {@code null} if there is no such conversion (i.e. unmapped indices - * should produce {@code null}). + * Conversion expression to apply when the field is unmapped in the index, treating it as {@link DataType#KEYWORD}, or {@code null} + * if there is no such conversion (i.e., unmapped indices should produce {@code null}). */ @Nullable Expression getUnmappedConversionExpression(); + + Collection getConversionExpressions(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 239e498b8807d..f67cee946e116 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -265,21 +265,19 @@ private ValuesSourceReaderOperator.LoaderAndConverter blockLoaderAndConverter( ); return ValuesSourceReaderOperator.load(blockLoader); } - Expression conversion; - if (unionTypes instanceof MultiTypeEsField legacy) { - // Use the fully qualified name `cluster:index-name` because multiple types are resolved on coordinator with the cluster prefix - String indexName = shardContext.ctx.getFullyQualifiedIndex().getName(); - conversion = legacy.getConversionExpressionForIndex(indexName); - } else { - // Type-keyed lookup: resolve the field's local data type from the shard context, falling back to "unmapped" when the field has - // no local mapping. The conversion map is keyed by DataType.typeName(). - MappedFieldType mft = shardContext.fieldType(fieldName); - conversion = mft == null - ? null - : ((CompactMultiTypeEsField) unionTypes).getConversionExpressionForType( - EsqlDataTypeRegistry.INSTANCE.fromEs(mft.typeName(), mft.getMetricType()) - ); - } + Expression conversion = switch (unionTypes) { + case CompactMultiTypeEsField compact -> { + MappedFieldType mft = shardContext.fieldType(fieldName); + yield mft == null + ? null + : compact.getConversionExpressionForType(EsqlDataTypeRegistry.INSTANCE.fromEs(mft.typeName(), mft.getMetricType())); + } + case MultiTypeEsField legacy -> { + // Use the fully qualified name `cluster:index-name` because multiple types are resolved on coordinator with cluster prefix + String indexName = shardContext.ctx.getFullyQualifiedIndex().getName(); + yield legacy.getConversionExpressionForIndex(indexName); + } + }; if (conversion == null) { Expression potentiallyUnmapped = unionTypes.getUnmappedConversionExpression(); if (!(potentiallyUnmapped instanceof AbstractConvertFunction convert)) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java index 93aa9c19decd3..317718b01a3f4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactMultiTypeEsFieldTests.java @@ -42,7 +42,6 @@ * Mirror of {@link MultiTypeEsFieldTests} for the type-keyed {@link CompactMultiTypeEsField}. */ public class CompactMultiTypeEsFieldTests extends AbstractEsFieldTypeTests { - private Configuration config; @Before diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index 38cabe0599311..a7ad559d0fb14 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.type; import org.apache.lucene.tests.util.RamUsageTester; +import org.elasticsearch.TransportVersion; import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TransportVersionUtils; @@ -31,8 +32,10 @@ import java.util.stream.IntStream; import static org.elasticsearch.xpack.esql.EsqlTestUtils.analyzer; +import static org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2; import static org.hamcrest.Matchers.lessThan; +// FIXME(gal, NOCOMMIT) Go over these docs /** * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially * less memory under {@link CompactMultiTypeEsField} (paired with {@link CompactInvalidMappedField}'s truncated index lists) than under @@ -64,30 +67,23 @@ public long accumulateObject(Object o, long shallowSize, Map fiel }; public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { - String query = buildExplicitConversionQuery(NUM_CONFLICTING_FIELDS); - - LogicalPlan legacyPlan = analyzer().addIndex(unionTypedIndex(false)) - .minimumTransportVersion(TransportVersionUtils.randomVersionNotSupporting(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2)) - .query(query); - LogicalPlan v2Plan = analyzer().addIndex(unionTypedIndex(true)) - .minimumTransportVersion(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2) - .query(query); + String evalAssignments = IntStream.range(0, MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS) + .mapToObj(i -> "id_" + i + "_kw = id_" + i + "::keyword") + .collect(Collectors.joining(", ")); + String keepFields = IntStream.range(0, MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS) + .mapToObj(i -> "id_" + i + "_kw") + .collect(Collectors.joining(", ")); + String query = "FROM idx* | EVAL " + evalAssignments + " | KEEP " + keepFields + " | LIMIT 1"; - long legacyBytes = RamUsageTester.ramUsed(legacyPlan, ACCUMULATOR); - long v2Bytes = RamUsageTester.ramUsed(v2Plan, ACCUMULATOR); - assertThat(v2Bytes * 10L, lessThan(legacyBytes)); + assertThat(getBytesUsed(true, query) * 10L, lessThan(getBytesUsed(false, query))); } - /** - * Build a query that forces the analyzer to materialize a {@code MultiTypeEsField}/{@link CompactMultiTypeEsField} for every - * {@code id_} field by explicitly casting each to keyword. - */ - private static String buildExplicitConversionQuery(int numFields) { - String evalAssignments = IntStream.range(0, numFields) - .mapToObj(i -> "id_" + i + "_kw = id_" + i + "::keyword") - .collect(Collectors.joining(", ")); - String keepFields = IntStream.range(0, numFields).mapToObj(i -> "id_" + i + "_kw").collect(Collectors.joining(", ")); - return "FROM idx* | EVAL " + evalAssignments + " | KEEP " + keepFields + " | LIMIT 1"; + private static long getBytesUsed(boolean compact, String query) { + TransportVersion transportVersion = compact + ? TransportVersionUtils.randomVersionSupporting(ESQL_MULTI_TYPE_ES_FIELD_2) + : TransportVersionUtils.randomVersionNotSupporting(ESQL_MULTI_TYPE_ES_FIELD_2); + LogicalPlan plan = analyzer().addIndex(unionTypedIndex(compact)).minimumTransportVersion(transportVersion).query(query); + return RamUsageTester.ramUsed(plan, ACCUMULATOR); } /** From 43c2bb8e60b371d049c65bfbafef2bd41d780706 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 15:40:59 +0300 Subject: [PATCH 13/20] Undo style changes in Analyzer --- .../xpack/esql/analysis/Analyzer.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index c5729b2c52d46..fed6a24708078 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.analysis; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.TransportVersion; import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.logging.LoggerMessageFormat; @@ -489,8 +488,8 @@ protected LogicalPlan rule(UnresolvedExternalRelation plan, AnalyzerContext cont private String extractTablePath(Expression tablePath) { if (tablePath instanceof Literal literal && literal.value() != null) { Object value = literal.value(); - if (value instanceof BytesRef) { - return BytesRefs.toString((BytesRef) value); + if (value instanceof org.apache.lucene.util.BytesRef) { + return BytesRefs.toString((org.apache.lucene.util.BytesRef) value); } return value.toString(); } @@ -1274,7 +1273,7 @@ private LogicalPlan resolveFuse(Fuse fuse, List childrenOutput) { LogicalPlan scoreEval = new FuseScoreEval(source, fuse.child(), score, discriminator, fuse.fuseType(), fuse.options()); // create aggregations - Expression aggFilter = new Literal(source, true, BOOLEAN); + Expression aggFilter = new Literal(source, true, DataType.BOOLEAN); List aggregates = new ArrayList<>(); aggregates.add( @@ -1315,7 +1314,7 @@ private LogicalPlan resolvePromql(PromqlCommand promql, List children ); // Wrap in an explicit LIMIT 0 so that AddImplicitLimit skips the "No limit defined" warning, // which would otherwise fire because the LocalRelation contains no PromqlCommand marker. - return new Limit(source, new Literal(source, 0, INTEGER), localRelation); + return new Limit(source, new Literal(source, 0, DataType.INTEGER), localRelation); } LogicalPlan promqlPlan = promql.promqlPlan(); Function lambda = ua -> maybeResolveAttribute(ua, childrenOutput); @@ -1672,7 +1671,7 @@ private static List potentialCandidatesIfNoMatchesFound( UnresolvedAttribute ua, List matches, Collection attrList, - Function, String> messageProducer + java.util.function.Function, String> messageProducer ) { if (ua.customMessage()) { return List.of(); @@ -1867,7 +1866,7 @@ public LogicalPlan apply(LogicalPlan logicalPlan, AnalyzerContext context) { // entries to the max } var source = logicalPlan.source(); - return new Limit(source, new Literal(source, limit, INTEGER), logicalPlan); + return new Limit(source, new Literal(source, limit, DataType.INTEGER), logicalPlan); } } @@ -2363,7 +2362,7 @@ private static Expression resolveConvertFunction( + Expressions.name(fa) + "]"; Expression ua = new UnresolvedAttribute(fa.source(), fa.name(), unresolvedMessage); - return fcf.replaceChildren(singletonList(ua)); + return fcf.replaceChildren(Collections.singletonList(ua)); } imf.types().forEach(type -> { if (supportedTypes.contains(type.widenSmallNumeric())) { @@ -3025,7 +3024,7 @@ private static LogicalPlan maybePushDownConvertFunctions( Alias newAlias = new Alias( oldAttr.source(), newAliasName, // oldAttrName$$converted_to$$targetType - convert.replaceChildren(singletonList(oldAttr)), + convert.replaceChildren(Collections.singletonList(oldAttr)), null, // generate a new id true // this'll be used to Project the synthetic attributes out when finishing analysis ); From ad272b1e29f9ad34b6d84f24ce22d7a0d08cb4f7 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 15:45:38 +0300 Subject: [PATCH 14/20] More human refacotrs --- .../xpack/esql/analysis/Analyzer.java | 35 +++++++++---------- .../esql/core/type/MultiTypeEsField.java | 8 ++--- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index fed6a24708078..233c7fb2fd00d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -56,7 +56,6 @@ import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; import org.elasticsearch.xpack.esql.core.type.TypeConflictField; @@ -176,6 +175,7 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -2249,23 +2249,6 @@ private static class ResolveUnionTypes extends ParameterizedRule typesToConversionExpressions, - @Nullable Expression unmappedConversionExpression, - AnalyzerContext context - ) { - return context.minimumVersion().supports(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2) - ? CompactMultiTypeEsField.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression) - : MultiTypeEsField.resolveFrom((InvalidMappedField) imf, typesToConversionExpressions) - .withPotentiallyUnmappedExpression(unmappedConversionExpression); - } - @Override public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { List unionFieldAttributes = new ArrayList<>(); @@ -2499,6 +2482,22 @@ static EsField resolvedMultiTypeEsField( return buildMultiTypeEsField(imf, typesToConversionExpressions, potentiallyUnmappedConversion, context); } + /** + * Picks between the legacy {@link MultiTypeEsField} and the new {@link CompactMultiTypeEsField} based on the cluster minimum + * transport version, so that newly-built plans remain deserializable on older nodes. + */ + private static EsField buildMultiTypeEsField( + TypeConflictField imf, + Map typesToConversionExpressions, + @Nullable Expression unmappedConversionExpression, + AnalyzerContext context + ) { + return context.minimumVersion().supports(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2) + ? CompactMultiTypeEsField.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression) + : MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) + .withPotentiallyUnmappedExpression(unmappedConversionExpression); + } + private static boolean canConvertOriginalTypes(UnionTypeEsField unionTypeEsField, Set supportedTypes) { return unionTypeEsField.getConversionExpressions() .stream() diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index b658372df6fd5..a4b81ffbc9cbc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -126,10 +126,10 @@ public MultiTypeEsField withPotentiallyUnmappedExpression(@Nullable Expression p } public static MultiTypeEsField resolveFrom( - InvalidMappedField invalidMappedField, + TypeConflictField typeConflictedField, Map typesToConversionExpressions ) { - Map> typesToIndices = invalidMappedField.getTypesToIndices(); + Map> typesToIndices = typeConflictedField.getTypesToIndices(); DataType resolvedDataType = DataType.UNSUPPORTED; Map indexToConversionExpressions = new HashMap<>(); for (String typeName : typesToIndices.keySet()) { @@ -145,11 +145,11 @@ public static MultiTypeEsField resolveFrom( } } return new MultiTypeEsField( - invalidMappedField.getName(), + typeConflictedField.getName(), resolvedDataType, false, indexToConversionExpressions, - invalidMappedField.getTimeSeriesFieldType(), + typeConflictedField.getTimeSeriesFieldType(), null ); } From 178bb6cead9aed0a5eb0ac411badbf24399614b5 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 15:52:11 +0300 Subject: [PATCH 15/20] ESQL: Key CompactInvalidMappedField by DataType internally The truncated typesToIndices map now uses DataType keys (mirroring CompactMultiTypeEsField), so the type-safe key flows down to construction. The TypeConflictField#getTypesToIndices contract is preserved by converting to string keys on access; that's cheap because the map is post-truncation and the accessor sits on a cold path. Made-with: Cursor --- .../core/type/CompactInvalidMappedField.java | 44 +++++++++++-------- .../type/CompactInvalidMappedFieldTests.java | 35 ++++++++------- .../type/MultiTypeEsFieldMemoryTests.java | 15 ++++--- 3 files changed, 53 insertions(+), 41 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java index f064bf95d75bd..6212547a7536c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java @@ -34,34 +34,30 @@ * {@link InvalidMappedField}: their on-the-wire form is identical (the truncated/full {@code typesToIndices} map is never serialized) so * sharing implementation via inheritance would only obscure the fact that they're peer flavors of the same field shape. * - *

Wire format matches {@link InvalidMappedField} byte-for-byte and reuses its writeable name, so a {@code CompactInvalidMappedField} - * round-trips through the wire as a plain {@link InvalidMappedField} on the receiving side. That's fine because {@code typesToIndices} - * is empty after deserialization anyway, so the truncation no longer matters. + *

The internal {@code typesToIndices} map is keyed by {@link DataType} (rather than {@link String}) so that the type-safe + * {@link DataType} flows all the way down to construction; the {@link TypeConflictField#getTypesToIndices()} contract is honored by + * converting back to string keys on access. Since the map is post-truncation (a few entries per field), the conversion is cheap and the + * accessor is on a cold path anyway. */ public final class CompactInvalidMappedField extends EsField implements TypeConflictField { private static final int MAX_INDICES_PER_TYPE = 3; private final String errorMessage; - private final Map> typesToIndices; + private final Map> typesToIndices; private final boolean isPotentiallyUnmapped; - public CompactInvalidMappedField(String name, Map> typesToIndices) { - this(name, TypeConflictField.makeErrorMessage(typesToIndices, false), truncate(typesToIndices), false); + public CompactInvalidMappedField(String name, Map> typesToIndices) { + this(name, makeErrorMessage(typesToIndices, false), truncate(typesToIndices), false); } - public static CompactInvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { - return new CompactInvalidMappedField( - name, - TypeConflictField.makeErrorMessage(typesToIndices, true), - truncate(typesToIndices), - true - ); + public static CompactInvalidMappedField potentiallyUnmapped(String name, Map> typesToIndices) { + return new CompactInvalidMappedField(name, makeErrorMessage(typesToIndices, true), truncate(typesToIndices), true); } private CompactInvalidMappedField( String name, String errorMessage, - Map> typesToIndices, + Map> typesToIndices, boolean isPotentiallyUnmapped ) { super(name, DataType.UNSUPPORTED, new TreeMap<>(), false, TimeSeriesFieldType.UNKNOWN); @@ -87,7 +83,9 @@ public String errorMessage() { @Override public Map> getTypesToIndices() { - return typesToIndices; + Map> result = new TreeMap<>(); + typesToIndices.forEach((k, v) -> result.put(k.typeName(), v)); + return result; } @Override @@ -120,9 +118,9 @@ public boolean equals(Object obj) { } /** Cap each per-type index set at {@value #MAX_INDICES_PER_TYPE} entries. */ - private static Map> truncate(Map> typesToIndices) { - Map> result = new TreeMap<>(); - for (Map.Entry> entry : typesToIndices.entrySet()) { + private static Map> truncate(Map> typesToIndices) { + Map> result = new TreeMap<>(); + for (Map.Entry> entry : typesToIndices.entrySet()) { Set indices = entry.getValue(); result.put(entry.getKey(), indices.size() <= MAX_INDICES_PER_TYPE ? Set.copyOf(indices) : truncate(indices)); } @@ -135,4 +133,14 @@ private static Map> truncate(Map> typesT truncated.add("..."); return Collections.unmodifiableSet(truncated); } + + /** + * Adapter onto {@link TypeConflictField#makeErrorMessage(Map, boolean)} since that one is shared with {@link InvalidMappedField} and + * therefore takes string keys. The string-keyed view is built ad-hoc here, used to render the message, and discarded. + */ + private static String makeErrorMessage(Map> typesToIndices, boolean includeInsistKeyword) { + Map> stringKeyed = new TreeMap<>(); + typesToIndices.forEach((k, v) -> stringKeyed.put(k.typeName(), v)); + return TypeConflictField.makeErrorMessage(stringKeyed, includeInsistKeyword); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java index 34df3d1f441a0..086f4c349ef97 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/CompactInvalidMappedFieldTests.java @@ -28,10 +28,10 @@ public class CompactInvalidMappedFieldTests extends ESTestCase { public void testKeepsAllIndicesWhenAtOrBelowLimit() { - Map> input = Map.of( - DataType.KEYWORD.typeName(), + Map> input = Map.of( + DataType.KEYWORD, new LinkedHashSet<>(Set.of("idx_a", "idx_b")), - DataType.LONG.typeName(), + DataType.LONG, new LinkedHashSet<>(Set.of("idx_c", "idx_d", "idx_e")) ); @@ -47,7 +47,7 @@ public void testTruncatesAboveLimitAndAddsEllipsisSentinel() { .mapToObj(i -> Strings.format("idx_%05d", i)) .collect(Collectors.toCollection(LinkedHashSet::new)); - CompactInvalidMappedField field = new CompactInvalidMappedField("f", Map.of(DataType.KEYWORD.typeName(), manyIndices)); + CompactInvalidMappedField field = new CompactInvalidMappedField("f", Map.of(DataType.KEYWORD, manyIndices)); assertMap( field.getTypesToIndices(), @@ -59,7 +59,7 @@ public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { Set manyIndices = IntStream.range(0, 5_000) .mapToObj(i -> Strings.format("idx_%05d", i)) .collect(Collectors.toCollection(LinkedHashSet::new)); - Map> input = new TreeMap<>(Map.of(DataType.KEYWORD.typeName(), manyIndices)); + Map> input = new TreeMap<>(Map.of(DataType.KEYWORD, manyIndices)); String message = new CompactInvalidMappedField("f", input).errorMessage(); @@ -69,20 +69,21 @@ public void testErrorMessageReflectsFullInputCountEvenAfterTruncation() { } public void testErrorMessageMatchesInvalidMappedFieldForSmallInputs() { - Map> input = new TreeMap<>( - Map.of( - DataType.KEYWORD.typeName(), - new LinkedHashSet<>(Set.of("idx_a", "idx_b")), - DataType.LONG.typeName(), - new LinkedHashSet<>(Set.of("idx_c")) - ) + Set kwIndices = new LinkedHashSet<>(Set.of("idx_a", "idx_b")); + Set longIndices = new LinkedHashSet<>(Set.of("idx_c")); + Map> compactInput = new TreeMap<>(Map.of(DataType.KEYWORD, kwIndices, DataType.LONG, longIndices)); + Map> legacyInput = new TreeMap<>( + Map.of(DataType.KEYWORD.typeName(), kwIndices, DataType.LONG.typeName(), longIndices) ); - assertThat(new CompactInvalidMappedField("f", input).errorMessage(), equalTo(new InvalidMappedField("f", input).errorMessage())); + assertThat( + new CompactInvalidMappedField("f", compactInput).errorMessage(), + equalTo(new InvalidMappedField("f", legacyInput).errorMessage()) + ); } public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { - Map> input = new TreeMap<>(Map.of(DataType.LONG.typeName(), new LinkedHashSet<>(Set.of("idx_a")))); + Map> input = new TreeMap<>(Map.of(DataType.LONG, new LinkedHashSet<>(Set.of("idx_a")))); CompactInvalidMappedField field = CompactInvalidMappedField.potentiallyUnmapped("f", input); @@ -92,11 +93,11 @@ public void testPotentiallyUnmappedFlagAndMessageInsistOnKeyword() { } public void testTypesReflectsKeysOfTruncatedMap() { - Map> input = new TreeMap<>( + Map> input = new TreeMap<>( Map.of( - DataType.KEYWORD.typeName(), + DataType.KEYWORD, IntStream.range(0, 100).mapToObj(i -> "k" + i).collect(Collectors.toSet()), - DataType.LONG.typeName(), + DataType.LONG, Set.of("only") ) ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index a7ad559d0fb14..b1d4f034b6fb4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xpack.esql.core.type.CompactInvalidMappedField; import org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.index.EsIndex; @@ -100,17 +101,19 @@ private static IndexResolution unionTypedIndex(boolean compact) { Map mapping = new HashMap<>(); for (int i = 0; i < MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS; i++) { String fieldName = "id_" + i; - Map> perFieldTypesToIndices = new HashMap<>(); - perFieldTypesToIndices.put("keyword", new HashSet<>()); - perFieldTypesToIndices.put("integer", new HashSet<>()); + Set kwIndices = new HashSet<>(); + Set intIndices = new HashSet<>(); for (int j = 0; j < MultiTypeEsFieldMemoryTests.NUM_INDICES; j++) { - perFieldTypesToIndices.get(j % 2 == 0 ? "keyword" : "integer").add("idx_" + j); + (j % 2 == 0 ? kwIndices : intIndices).add("idx_" + j); } mapping.put( fieldName, compact - ? new CompactInvalidMappedField(fieldName, perFieldTypesToIndices) - : new InvalidMappedField(fieldName, perFieldTypesToIndices) + ? new CompactInvalidMappedField(fieldName, Map.of(DataType.KEYWORD, kwIndices, DataType.INTEGER, intIndices)) + : new InvalidMappedField( + fieldName, + Map.of(DataType.KEYWORD.typeName(), kwIndices, DataType.INTEGER.typeName(), intIndices) + ) ); } return IndexResolution.valid(new EsIndex("idx*", mapping, indexNamesWithModes, Map.of(), Map.of())); From d34413b159c2fddb66b07ccacd9efa307d749bd6 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 16:24:05 +0300 Subject: [PATCH 16/20] Get rid of AI javadocs --- .../core/type/CompactInvalidMappedField.java | 17 +------------- .../core/type/CompactMultiTypeEsField.java | 10 +------- .../esql/core/type/TypeConflictField.java | 23 +++---------------- .../type/MultiTypeEsFieldMemoryTests.java | 19 ++------------- 4 files changed, 7 insertions(+), 62 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java index 6212547a7536c..abaa99b2ef882 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java @@ -20,24 +20,9 @@ import java.util.Set; import java.util.TreeMap; -// FIXME(gal, NOCOMMIT) Go over these docs /** * Memory-frugal counterpart to {@link InvalidMappedField}: stores at most {@value #MAX_INDICES_PER_TYPE} concrete index names per source - * type instead of the full per-type index list. Wide union-typed fields routinely span thousands of indices but the only consumers that - * need the full list are the legacy index-keyed conversion structures, and they aren't used on transport versions that support - * {@link CompactMultiTypeEsField}. Truncating here lets the analyzed plan stay small while still producing a good "[a, b, c, ...]" error - * message: the message itself is rendered from the full input map at construction time and then stored as a string, so we lose only the - * post-construction ability to enumerate every index. - * - *

The two classes share the {@link TypeConflictField} interface so consumers (the analyzer, the verifier, type resolution) can branch - * on it instead of either concrete class. {@link CompactInvalidMappedField} deliberately does not extend - * {@link InvalidMappedField}: their on-the-wire form is identical (the truncated/full {@code typesToIndices} map is never serialized) so - * sharing implementation via inheritance would only obscure the fact that they're peer flavors of the same field shape. - * - *

The internal {@code typesToIndices} map is keyed by {@link DataType} (rather than {@link String}) so that the type-safe - * {@link DataType} flows all the way down to construction; the {@link TypeConflictField#getTypesToIndices()} contract is honored by - * converting back to string keys on access. Since the map is post-truncation (a few entries per field), the conversion is cheap and the - * accessor is on a cold path anyway. + * type instead of the full per-type index list. */ public final class CompactInvalidMappedField extends EsField implements TypeConflictField { private static final int MAX_INDICES_PER_TYPE = 3; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index c6588431032d4..5c8b869332ebe 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -23,18 +23,10 @@ import java.util.Objects; import java.util.Set; -// FIXME(gal, NOCOMMIT) Go over these javadocs // FIXME(gal, NOCOMMIT) Reduce duplication with MultiTypeEsField /** * Memory-efficient variant of {@link MultiTypeEsField} that stores the per-source-type conversion - * expressions directly, rather than expanding them to one entry per index. Plus an optional - * {@code unmappedConversionExpression} for indices in which the field is unmapped (treated as - * {@link DataType#KEYWORD}). On a data node, the conversion expression is looked up by the field's - * locally-resolved data type. - * - *

This is the on-the-wire successor to {@link MultiTypeEsField}; the analyzer falls back to the - * legacy {@link MultiTypeEsField} when the cluster minimum transport version does not yet support - * {@code esql_multi_type_es_field_2}. + * expressions directly, rather than expanding them to one entry per index. */ public final class CompactMultiTypeEsField extends EsField implements UnionTypeEsField { // FIXME(gal, NOCOMMIT) rename diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java index aeb0d3b1ad2ef..b1128ceac663b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java @@ -12,16 +12,6 @@ import java.util.stream.Collectors; // FIXME(gal, NOCOMMIT) Go over these docs -/** - * Common surface for the two ESQL field flavors that carry per-index type-conflict information: the legacy - * {@link InvalidMappedField} (full per-type index lists) and the memory-frugal {@link CompactInvalidMappedField} (truncated lists). - * Production code that consumes these fields (analyzer rules, the verifier, type resolution) should branch on this interface so it - * stays oblivious to which flavor a particular {@link EsField} happens to be. - * - *

The {@code getName} / {@code getProperties} / {@code isAggregatable} / {@code getTimeSeriesFieldType} accessors are all - * provided for free by {@link EsField}, which both implementations extend; they're declared here so consumers can pull everything - * they need off a single typed reference. - */ public sealed interface TypeConflictField permits InvalidMappedField, CompactInvalidMappedField { String getName(); @@ -44,18 +34,11 @@ public sealed interface TypeConflictField permits InvalidMappedField, CompactInv */ Map> getTypesToIndices(); - /** - * Whether the field is unmapped in at least one index, in which case it is treated as {@link DataType#KEYWORD} for the unmapped - * indices. - */ + /** Whether the field is unmapped in at least one index, in which case it's treated as {@link DataType#KEYWORD} where it is unmapped. */ boolean isPotentiallyUnmapped(); - /** - * Source data types observed for this field across all indices. - */ - default Set types() { - return getTypesToIndices().keySet().stream().map(DataType::fromTypeName).collect(Collectors.toSet()); - } + /** Source data types observed for this field across all indices. */ + Set types(); /** * Build the user-facing error message for a per-type-to-indices map. Shared between both implementations so they stay in sync. diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index b1d4f034b6fb4..cb4c688ec5e94 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -40,16 +40,7 @@ /** * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially * less memory under {@link CompactMultiTypeEsField} (paired with {@link CompactInvalidMappedField}'s truncated index lists) than under - * the legacy {@link org.elasticsearch.xpack.esql.core.type.MultiTypeEsField} (keyed per-index) paired with a full {@link InvalidMappedField}. - * - *

The cost we're targeting is {@code O(num_fields * num_indices)}: each conflicting field expands into its own per-index conversion - * map under legacy, and per-type under v2. With many fields the constant overhead from {@link EsIndex#indexNameWithModes} becomes a - * small fixed tax, and the savings show up cleanly at plan-total scope. - * - *

One subtlety in the fixture: each conflicting field gets its own {@code typesToIndices} map. Sharing one across all fields - * (even though it's logically the same content) makes {@link RamUsageTester}'s identity-based dedup collapse all 50 analyzer-derived - * conversion structures down to roughly one field's worth, which masks the savings. Production index resolutions don't share these maps - * across fields - they're built per-field from {@code FieldCapabilitiesResponse} - so the per-field copy here is the realistic case. + * the legacy {@link InvalidMappedField} (keyed per-index). */ public class MultiTypeEsFieldMemoryTests extends ESTestCase { private static final int NUM_INDICES = 5_000; @@ -58,7 +49,7 @@ public class MultiTypeEsFieldMemoryTests extends ESTestCase { /** * {@link RamUsageTester} walks reflectively, which fails on JDK-internal classes (e.g. {@code sun.util.locale.BaseLocale}) that * aren't opened to unnamed modules. The plan transitively references a {@link Locale} and a {@link ZoneId} via the analyzer's - * {@code Configuration}, so we treat those as opaque - they're irrelevant to the union-type memory we care about here. + * {@code Configuration}, so we treat those as opaque as they're irrelevant to the union-type memory we care about here. */ private static final RamUsageTester.Accumulator ACCUMULATOR = new RamUsageTester.Accumulator() { @Override @@ -87,12 +78,6 @@ private static long getBytesUsed(boolean compact, String query) { return RamUsageTester.ramUsed(plan, ACCUMULATOR); } - /** - * Build a fake "idx*" pattern with {@code numIndices} concrete indices and {@code numConflictingFields} fields {@code id_0..id_}, - * each with type {@code keyword} in half of the indices and {@code integer} in the other half. When {@code compact} is true the - * conflicting fields are built from {@link CompactInvalidMappedField} (truncated index lists), matching what a v2-capable coordinator - * produces; otherwise the full {@link InvalidMappedField} is used. - */ private static IndexResolution unionTypedIndex(boolean compact) { Map indexNamesWithModes = new HashMap<>(); for (int i = 0; i < MultiTypeEsFieldMemoryTests.NUM_INDICES; i++) { From be949ac063a07d21a08bef6c856e04c298ff4b5c Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 16:25:16 +0300 Subject: [PATCH 17/20] More human refacotrs --- .../esql/core/type/CompactInvalidMappedField.java | 5 +++++ .../xpack/esql/core/type/InvalidMappedField.java | 11 ++++++++++- .../xpack/esql/core/type/MultiTypeEsField.java | 4 +++- .../xpack/esql/type/MultiTypeEsFieldMemoryTests.java | 1 - 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java index abaa99b2ef882..3b3253ad3559f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactInvalidMappedField.java @@ -78,6 +78,11 @@ public boolean isPotentiallyUnmapped() { return isPotentiallyUnmapped; } + @Override + public Set types() { + return typesToIndices.keySet(); + } + @Override public EsField getExactField() { throw new QlIllegalArgumentException("Field [" + getName() + "] is invalid, cannot access it"); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index 82917b8322ec3..3ae33030a7f5d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -19,9 +19,13 @@ import java.util.Objects; import java.util.Set; import java.util.TreeMap; +import java.util.stream.Collectors; // FIXME(gal, NOCOMMIT) Redocument that this is for BWC /** + *

+ * N.B.: This class exists only as a backward-compatible version of {@link CompactInvalidMappedField}. + *

* Representation of field mapped differently across indices; or being potentially unmapped in some, in which case it is treated as * {@link DataType#KEYWORD} in the indices where it is unmapped. * Used during mapping discovery only. @@ -84,7 +88,7 @@ private InvalidMappedField( this.isPotentiallyUnmapped = isPotentiallyUnmapped; } - protected InvalidMappedField(StreamInput in) throws IOException { + InvalidMappedField(StreamInput in) throws IOException { this( ((PlanStreamInput) in).readCachedString(), in.readString(), @@ -147,4 +151,9 @@ public Map> getTypesToIndices() { public boolean isPotentiallyUnmapped() { return isPotentiallyUnmapped; } + + @Override + public Set types() { + return getTypesToIndices().keySet().stream().map(DataType::fromTypeName).collect(Collectors.toSet()); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index a4b81ffbc9cbc..2830e358cfad9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -23,8 +23,10 @@ import java.util.Objects; import java.util.Set; -// FIXME(gal, NOCOMMIT) Redocument that this is for BWC. /** + *

+ * N.B.: This class exists only as a backward-compatible version of {@link CompactInvalidMappedField}. + *

* During IndexResolution it can occur that the same field is mapped to different types in different indices. * An {@link InvalidMappedField} holds that information and allows for later resolution of the field * to a single type in {@code ResolveUnionTypes}. diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index cb4c688ec5e94..98091e19a03f0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -36,7 +36,6 @@ import static org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2; import static org.hamcrest.Matchers.lessThan; -// FIXME(gal, NOCOMMIT) Go over these docs /** * End-to-end check that an analyzed plan over many union-typed fields, each conflicting across thousands of indices, retains substantially * less memory under {@link CompactMultiTypeEsField} (paired with {@link CompactInvalidMappedField}'s truncated index lists) than under From cac19da6f11cf7124edb342bdffbbf6810035eaa Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 18:18:50 +0300 Subject: [PATCH 18/20] ESQL: Dedup MultiTypeEsField/CompactMultiTypeEsField resolveFrom Pull the shared per-type validation loop into UnionTypeEsField.resolve, returning a (resolvedDataType, typeToExpr) Resolution. Both resolveFrom methods now consume that and only differ in their post-processing: the compact form keeps the per-type map and applies the unmapped fallback for the resolved type, while the legacy form expands it to per-index entries. Made-with: Cursor --- .../core/type/CompactMultiTypeEsField.java | 31 +++---------------- .../esql/core/type/MultiTypeEsField.java | 18 +++-------- .../esql/core/type/UnionTypeEsField.java | 19 ++++++++++++ 3 files changed, 29 insertions(+), 39 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index 5c8b869332ebe..0f720ed9413e6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -18,10 +18,8 @@ import java.io.IOException; import java.util.Collection; -import java.util.HashMap; import java.util.Map; import java.util.Objects; -import java.util.Set; // FIXME(gal, NOCOMMIT) Reduce duplication with MultiTypeEsField /** @@ -107,39 +105,20 @@ public Collection getConversionExpressions() { return unmappedConversionExpression; } - /** - * Build a {@link CompactMultiTypeEsField} from the per-type resolutions previously computed against a - * {@link TypeConflictField}. Only types present in {@code imf.getTypesToIndices()} for which a - * conversion was supplied are included. - */ public static CompactMultiTypeEsField resolveFrom( TypeConflictField imf, Map typesToConversionExpressions, @Nullable Expression unmappedConversionExpression ) { - Map> typesToIndices = imf.getTypesToIndices(); - DataType resolvedDataType = DataType.UNSUPPORTED; - Map filtered = new HashMap<>(); - for (String typeName : typesToIndices.keySet()) { - Expression convertExpr = typesToConversionExpressions.get(typeName); - if (convertExpr == null) { - continue; - } - if (resolvedDataType == DataType.UNSUPPORTED) { - resolvedDataType = convertExpr.dataType(); - } else if (resolvedDataType != convertExpr.dataType()) { - throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType()); - } - filtered.put(DataType.fromTypeName(typeName), convertExpr); - } - if (resolvedDataType == DataType.UNSUPPORTED && unmappedConversionExpression != null) { - resolvedDataType = unmappedConversionExpression.dataType(); - } + UnionTypeEsField.Resolution resolution = UnionTypeEsField.resolve(imf, typesToConversionExpressions); + DataType resolvedDataType = resolution.resolvedDataType() == DataType.UNSUPPORTED && unmappedConversionExpression != null + ? unmappedConversionExpression.dataType() + : resolution.resolvedDataType(); return new CompactMultiTypeEsField( imf.getName(), resolvedDataType, false, - filtered, + resolution.typeToExpr(), imf.getTimeSeriesFieldType(), unmappedConversionExpression ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index 2830e358cfad9..f1de9c35e52d4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Objects; -import java.util.Set; /** *

@@ -131,24 +130,17 @@ public static MultiTypeEsField resolveFrom( TypeConflictField typeConflictedField, Map typesToConversionExpressions ) { - Map> typesToIndices = typeConflictedField.getTypesToIndices(); - DataType resolvedDataType = DataType.UNSUPPORTED; + UnionTypeEsField.Resolution resolution = UnionTypeEsField.resolve(typeConflictedField, typesToConversionExpressions); Map indexToConversionExpressions = new HashMap<>(); - for (String typeName : typesToIndices.keySet()) { - Set indices = typesToIndices.get(typeName); - Expression convertExpr = typesToConversionExpressions.get(typeName); - if (resolvedDataType == DataType.UNSUPPORTED) { - resolvedDataType = convertExpr.dataType(); - } else if (resolvedDataType != convertExpr.dataType()) { - throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType()); - } + typeConflictedField.getTypesToIndices().forEach((typeName, indices) -> { + Expression convertExpr = resolution.typeToExpr().get(DataType.fromTypeName(typeName)); for (String indexName : indices) { indexToConversionExpressions.put(indexName, convertExpr); } - } + }); return new MultiTypeEsField( typeConflictedField.getName(), - resolvedDataType, + resolution.resolvedDataType(), false, indexToConversionExpressions, typeConflictedField.getTimeSeriesFieldType(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java index 022dabe688f05..a17a4205aab4e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java @@ -11,6 +11,8 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; /** * Common interface implemented by both {@link MultiTypeEsField} (legacy, keyed by index name) and @@ -27,4 +29,21 @@ public sealed interface UnionTypeEsField permits MultiTypeEsField, CompactMultiT Expression getUnmappedConversionExpression(); Collection getConversionExpressions(); + + static Resolution resolve(TypeConflictField field, Map typesToConversionExpressions) { + DataType resolvedDataType = DataType.UNSUPPORTED; + Map typeToExpr = new HashMap<>(); + for (String typeName : field.getTypesToIndices().keySet()) { + Expression convertExpr = typesToConversionExpressions.get(typeName); + if (resolvedDataType == DataType.UNSUPPORTED) { + resolvedDataType = convertExpr.dataType(); + } else if (resolvedDataType != convertExpr.dataType()) { + throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType()); + } + typeToExpr.put(DataType.fromTypeName(typeName), convertExpr); + } + return new Resolution(resolvedDataType, typeToExpr); + } + + record Resolution(DataType resolvedDataType, Map typeToExpr) {} } From 09967b4a7dd17f5084f3d4ff6494c9af782edab6 Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 19:55:25 +0300 Subject: [PATCH 19/20] More human refacotrs --- ..._2.csv => compact_multi_type_es_field.csv} | 0 .../resources/transport/upper_bounds/9.5.csv | 2 +- .../xpack/esql/analysis/Analyzer.java | 46 +---------------- .../core/type/CompactMultiTypeEsField.java | 21 ++++++-- .../esql/core/type/InvalidMappedField.java | 1 - .../esql/core/type/MultiTypeEsField.java | 12 +++++ .../esql/core/type/TypeConflictField.java | 1 - .../esql/core/type/UnionTypeEsField.java | 13 +++++ .../xpack/esql/core/util/CollectionUtils.java | 11 ++++ .../type/MultiTypeEsFieldMemoryTests.java | 51 +++++++++---------- 10 files changed, 78 insertions(+), 80 deletions(-) rename server/src/main/resources/transport/definitions/referable/{esql_multi_type_es_field_2.csv => compact_multi_type_es_field.csv} (100%) diff --git a/server/src/main/resources/transport/definitions/referable/esql_multi_type_es_field_2.csv b/server/src/main/resources/transport/definitions/referable/compact_multi_type_es_field.csv similarity index 100% rename from server/src/main/resources/transport/definitions/referable/esql_multi_type_es_field_2.csv rename to server/src/main/resources/transport/definitions/referable/compact_multi_type_es_field.csv diff --git a/server/src/main/resources/transport/upper_bounds/9.5.csv b/server/src/main/resources/transport/upper_bounds/9.5.csv index f04765e78d0b6..320d7d7ab2161 100644 --- a/server/src/main/resources/transport/upper_bounds/9.5.csv +++ b/server/src/main/resources/transport/upper_bounds/9.5.csv @@ -1 +1 @@ -esql_multi_type_es_field_2,9366000 +compact_multi_type_es_field,9366000 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 233c7fb2fd00d..4fa6c4aa95824 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -2383,8 +2383,7 @@ private static Expression resolveConvertFunction( if (((UnionTypeEsField) fa.field()).getUnmappedConversionExpression() != null) { throw new IllegalStateException("Unexpected potentially unmapped expression for [" + fa.fieldName() + "]"); } - EsField multiTypeEsField = rewrapWithCast(fa, convertExpression); - return createIfDoesNotAlreadyExist(fa, multiTypeEsField, unionFieldAttributes); + return createIfDoesNotAlreadyExist(fa, unionTypeEsField.rewrapWithCast(convertExpression), unionFieldAttributes); } } else if (convert.field() instanceof AbstractConvertFunction subConvert) { return convertExpression.replaceChildren( @@ -2394,47 +2393,6 @@ private static Expression resolveConvertFunction( return convertExpression; } - // Wraps an existing union-type field's per-(index|type) conversions with another conversion expression on top, so the - // composite expression first does the original cast then the additional cast. - private static EsField rewrapWithCast(FieldAttribute fa, Expression convertExpression) { - // FIXME(gal, NOCOMMIT) reduce duplication - switch ((UnionTypeEsField) fa.field()) { - case CompactMultiTypeEsField compact -> { - Map typeToConversionExpressions = new HashMap<>(); - for (Map.Entry entry : compact.getTypeToConversionExpressions().entrySet()) { - typeToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); - } - return new CompactMultiTypeEsField( - fa.fieldName().string(), - convertExpression.dataType(), - false, - typeToConversionExpressions, - fa.field().getTimeSeriesFieldType(), - null - ); - } - case MultiTypeEsField legacy -> { - Map indexToConversionExpressions = new HashMap<>(); - for (Map.Entry entry : legacy.getIndexToConversionExpressions().entrySet()) { - indexToConversionExpressions.put(entry.getKey(), wrapWith(convertExpression, entry.getValue())); - } - return new MultiTypeEsField( - fa.fieldName().string(), - convertExpression.dataType(), - false, - indexToConversionExpressions, - fa.field().getTimeSeriesFieldType(), - null - ); - } - } - } - - private static Expression wrapWith(Expression convertExpression, Expression originalConvertFunction) { - AbstractConvertFunction inner = (AbstractConvertFunction) originalConvertFunction; - return convertExpression.replaceChildren(singletonList(inner.field())); - } - private static Expression createIfDoesNotAlreadyExist( FieldAttribute fa, EsField resolvedField, @@ -2492,7 +2450,7 @@ private static EsField buildMultiTypeEsField( @Nullable Expression unmappedConversionExpression, AnalyzerContext context ) { - return context.minimumVersion().supports(CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2) + return context.minimumVersion().supports(CompactMultiTypeEsField.CompactMultiTypeEsField) ? CompactMultiTypeEsField.resolveFrom(imf, typesToConversionExpressions, unmappedConversionExpression) : MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions) .withPotentiallyUnmappedExpression(unmappedConversionExpression); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java index 0f720ed9413e6..49466727a2002 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/CompactMultiTypeEsField.java @@ -21,15 +21,14 @@ import java.util.Map; import java.util.Objects; -// FIXME(gal, NOCOMMIT) Reduce duplication with MultiTypeEsField /** * Memory-efficient variant of {@link MultiTypeEsField} that stores the per-source-type conversion * expressions directly, rather than expanding them to one entry per index. */ public final class CompactMultiTypeEsField extends EsField implements UnionTypeEsField { - // FIXME(gal, NOCOMMIT) rename - public static final TransportVersion ESQL_MULTI_TYPE_ES_FIELD_2 = TransportVersion.fromName("esql_multi_type_es_field_2"); + public static final TransportVersion CompactMultiTypeEsField = TransportVersion.fromName("compact_multi_type_es_field"); + // TODO these Expressions should be an AbstractConvertFunction. private final Map typeToConversionExpressions; /** @@ -52,7 +51,7 @@ public CompactMultiTypeEsField( this.unmappedConversionExpression = unmappedConversionExpression; } - protected CompactMultiTypeEsField(StreamInput in) throws IOException { + CompactMultiTypeEsField(StreamInput in) throws IOException { this( ((PlanStreamInput) in).readCachedString(), DataType.readFrom(in), @@ -68,7 +67,7 @@ public void writeContent(StreamOutput out) throws IOException { ((PlanStreamOutput) out).writeCachedString(getName()); getDataType().writeTo(out); out.writeBoolean(isAggregatable()); - out.writeMap(typeToConversionExpressions, (o, k) -> k.writeTo(o), (o, v) -> o.writeNamedWriteable(v)); + out.writeMap(typeToConversionExpressions, (o, k) -> k.writeTo(o), StreamOutput::writeNamedWriteable); writeTimeSeriesFieldType(out); out.writeOptionalNamedWriteable(unmappedConversionExpression); } @@ -92,6 +91,18 @@ public Collection getConversionExpressions() { return typeToConversionExpressions.values(); } + @Override + public EsField rewrapWithCast(Expression convertExpression) { + return new CompactMultiTypeEsField( + getName(), + convertExpression.dataType(), + isAggregatable(), + UnionTypeEsField.replaceChildrenWithExpressionField(typeToConversionExpressions, convertExpression), + getTimeSeriesFieldType(), + unmappedConversionExpression + ); + } + /** * Returns the conversion expression to apply for the given source {@link DataType}, or {@code null} * if no conversion is registered for that type. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index 3ae33030a7f5d..74355913beb88 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -21,7 +21,6 @@ import java.util.TreeMap; import java.util.stream.Collectors; -// FIXME(gal, NOCOMMIT) Redocument that this is for BWC /** *

* N.B.: This class exists only as a backward-compatible version of {@link CompactInvalidMappedField}. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java index f1de9c35e52d4..07e8870d5e113 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/MultiTypeEsField.java @@ -111,6 +111,18 @@ public Collection getConversionExpressions() { return indexToConversionExpressions.values(); } + @Override + public EsField rewrapWithCast(Expression convertExpression) { + return new MultiTypeEsField( + getName(), + convertExpression.dataType(), + isAggregatable(), + UnionTypeEsField.replaceChildrenWithExpressionField(indexToConversionExpressions, convertExpression), + getTimeSeriesFieldType(), + potentiallyUnmappedExpression + ); + } + public @Nullable Expression getConversionExpressionForIndex(String indexName) { return indexToConversionExpressions.get(indexName); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java index b1128ceac663b..3df36cfd98116 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/TypeConflictField.java @@ -11,7 +11,6 @@ import java.util.Set; import java.util.stream.Collectors; -// FIXME(gal, NOCOMMIT) Go over these docs public sealed interface TypeConflictField permits InvalidMappedField, CompactInvalidMappedField { String getName(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java index a17a4205aab4e..25f7616f6a307 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/type/UnionTypeEsField.java @@ -9,9 +9,12 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.util.CollectionUtils; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -30,6 +33,16 @@ public sealed interface UnionTypeEsField permits MultiTypeEsField, CompactMultiT Collection getConversionExpressions(); + /** + * Wraps an existing union-type field's per-(index|type) conversions with another conversion expression on top, so the + * composite expression first does the original cast then the additional cast. + */ + EsField rewrapWithCast(Expression convertExpression); + + static Map replaceChildrenWithExpressionField(Map map, Expression expression) { + return CollectionUtils.mapValues(map, e -> expression.replaceChildren(List.of(((AbstractConvertFunction) e).field()))); + } + static Resolution resolve(TypeConflictField field, Map typesToConversionExpressions) { DataType resolvedDataType = DataType.UNSUPPORTED; Map typeToExpr = new HashMap<>(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/util/CollectionUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/util/CollectionUtils.java index e8dfa260fd803..03d9d5524546b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/util/CollectionUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/util/CollectionUtils.java @@ -9,8 +9,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import static java.util.Collections.emptyList; @@ -90,4 +93,12 @@ public static List prependToCopy(T element, Collection collection) { } return List.of(result); } + + public static Map mapValues(Map map, Function f) { + var res = new HashMap(); + for (var entry : map.entrySet()) { + res.put(entry.getKey(), f.apply(entry.getValue())); + } + return res; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java index 98091e19a03f0..9b66f779e765f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldMemoryTests.java @@ -24,7 +24,6 @@ import java.lang.reflect.Field; import java.time.ZoneId; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.Locale; import java.util.Map; @@ -33,7 +32,7 @@ import java.util.stream.IntStream; import static org.elasticsearch.xpack.esql.EsqlTestUtils.analyzer; -import static org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField.ESQL_MULTI_TYPE_ES_FIELD_2; +import static org.elasticsearch.xpack.esql.core.type.CompactMultiTypeEsField.CompactMultiTypeEsField; import static org.hamcrest.Matchers.lessThan; /** @@ -58,12 +57,10 @@ public long accumulateObject(Object o, long shallowSize, Map fiel }; public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { - String evalAssignments = IntStream.range(0, MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS) + String evalAssignments = IntStream.range(0, NUM_CONFLICTING_FIELDS) .mapToObj(i -> "id_" + i + "_kw = id_" + i + "::keyword") .collect(Collectors.joining(", ")); - String keepFields = IntStream.range(0, MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS) - .mapToObj(i -> "id_" + i + "_kw") - .collect(Collectors.joining(", ")); + String keepFields = IntStream.range(0, NUM_CONFLICTING_FIELDS).mapToObj(i -> "id_" + i + "_kw").collect(Collectors.joining(", ")); String query = "FROM idx* | EVAL " + evalAssignments + " | KEEP " + keepFields + " | LIMIT 1"; assertThat(getBytesUsed(true, query) * 10L, lessThan(getBytesUsed(false, query))); @@ -71,35 +68,33 @@ public void testV2AnalyzedPlanIsAtLeastTenTimesSmallerThanLegacy() { private static long getBytesUsed(boolean compact, String query) { TransportVersion transportVersion = compact - ? TransportVersionUtils.randomVersionSupporting(ESQL_MULTI_TYPE_ES_FIELD_2) - : TransportVersionUtils.randomVersionNotSupporting(ESQL_MULTI_TYPE_ES_FIELD_2); + ? TransportVersionUtils.randomVersionSupporting(CompactMultiTypeEsField) + : TransportVersionUtils.randomVersionNotSupporting(CompactMultiTypeEsField); LogicalPlan plan = analyzer().addIndex(unionTypedIndex(compact)).minimumTransportVersion(transportVersion).query(query); return RamUsageTester.ramUsed(plan, ACCUMULATOR); } private static IndexResolution unionTypedIndex(boolean compact) { - Map indexNamesWithModes = new HashMap<>(); - for (int i = 0; i < MultiTypeEsFieldMemoryTests.NUM_INDICES; i++) { - indexNamesWithModes.put("idx_" + i, IndexMode.STANDARD); + Map indexNamesWithModes = IntStream.range(0, NUM_INDICES) + .boxed() + .collect(Collectors.toMap(i -> "idx_" + i, i -> IndexMode.STANDARD)); + Map mapping = IntStream.range(0, NUM_CONFLICTING_FIELDS) + .boxed() + .collect(Collectors.toMap(i -> "id_" + i, i -> getEsField(compact, "id_" + i))); + return IndexResolution.valid(new EsIndex("idx*", mapping, indexNamesWithModes, Map.of(), Map.of())); + } + + private static EsField getEsField(boolean compact, String fieldName) { + Set keywordIndices = new HashSet<>(); + Set intIndices = new HashSet<>(); + for (int j = 0; j < NUM_INDICES; j++) { + (j % 2 == 0 ? keywordIndices : intIndices).add("idx_" + j); } - Map mapping = new HashMap<>(); - for (int i = 0; i < MultiTypeEsFieldMemoryTests.NUM_CONFLICTING_FIELDS; i++) { - String fieldName = "id_" + i; - Set kwIndices = new HashSet<>(); - Set intIndices = new HashSet<>(); - for (int j = 0; j < MultiTypeEsFieldMemoryTests.NUM_INDICES; j++) { - (j % 2 == 0 ? kwIndices : intIndices).add("idx_" + j); - } - mapping.put( + return compact + ? new CompactInvalidMappedField(fieldName, Map.of(DataType.KEYWORD, keywordIndices, DataType.INTEGER, intIndices)) + : new InvalidMappedField( fieldName, - compact - ? new CompactInvalidMappedField(fieldName, Map.of(DataType.KEYWORD, kwIndices, DataType.INTEGER, intIndices)) - : new InvalidMappedField( - fieldName, - Map.of(DataType.KEYWORD.typeName(), kwIndices, DataType.INTEGER.typeName(), intIndices) - ) + Map.of(DataType.KEYWORD.typeName(), keywordIndices, DataType.INTEGER.typeName(), intIndices) ); - } - return IndexResolution.valid(new EsIndex("idx*", mapping, indexNamesWithModes, Map.of(), Map.of())); } } From 2836d0fe6c8dd8eb3c6c13f2e07baed2731a1c4e Mon Sep 17 00:00:00 2001 From: Gal Lalouche Date: Thu, 30 Apr 2026 23:56:22 +0300 Subject: [PATCH 20/20] ESQL: Use family type name for compact union-type lookup CompactMultiTypeEsField's per-source-type conversion map is keyed by what field-caps reports on the coordinator, which is the family type (e.g. "keyword" for a constant_keyword field). The data-node lookup in EsPhysicalOperationProviders was using mft.typeName() instead, so constant_keyword (and any other family-collapsed type) missed the lookup, fell through to the unmapped path, and produced all-nulls. Fixes the constant_keyword yaml rest test on this branch. Made-with: Cursor --- .../xpack/esql/planner/EsPhysicalOperationProviders.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index f67cee946e116..58906817d83d6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -268,9 +268,13 @@ private ValuesSourceReaderOperator.LoaderAndConverter blockLoaderAndConverter( Expression conversion = switch (unionTypes) { case CompactMultiTypeEsField compact -> { MappedFieldType mft = shardContext.fieldType(fieldName); + // Match what field_caps reports on the coordinator: family type (e.g., constant_keyword -> keyword) rather than the + // concrete mapper type, so the lookup key here aligns with how typeToConversionExpressions was keyed upstream. yield mft == null ? null - : compact.getConversionExpressionForType(EsqlDataTypeRegistry.INSTANCE.fromEs(mft.typeName(), mft.getMetricType())); + : compact.getConversionExpressionForType( + EsqlDataTypeRegistry.INSTANCE.fromEs(mft.familyTypeName(), mft.getMetricType()) + ); } case MultiTypeEsField legacy -> { // Use the fully qualified name `cluster:index-name` because multiple types are resolved on coordinator with cluster prefix