From d19df98cf7dd5e769f183974134d12baa31770bd Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 01:57:25 +0000
Subject: [PATCH 1/6] Fix mixed-version distributed reads in get_traces
 filtered count (SNUBA-9W6)

## Description

Fixes EndpointGetTraces failing on mixed-version ClickHouse clusters with
`Code: 10 ... Not found column ... While executing Remote.` (SNUBA-9W6).

### The Problem

`KEY_FILTERED_ITEM_COUNT` is emitted as a `countIf(<trace item filter>)` in the
SELECT clause. When the user's filter contains an `IN` over a constant array
(e.g. `project_id IN [...]`), ClickHouse builds an internal prepared set whose
server-generated identifier (`__set_<Type>_<hash>_<hash>`) is baked into the
result-block column name. Because the aggregate lives in the SELECT clause, that
column name is matched by string across `Remote` nodes. On a mixed-version
cluster the two sides hash the set differently, so the names disagree and the
distributed read fails. This is the same class of bug as SNUBA-B82 (#8073).

### The Solution

Rewrite `in(x, array(...))` as `has(array(...), x)` within the filter that feeds
the `filtered_item_count` countIf. `has` over a constant array keeps the array
inline in the column name, which is byte-stable across ClickHouse versions, and
is equivalent to `x IN (array)` for scalar membership. The surrounding
NULL-handling wrapper is preserved untouched.

The rewrite is scoped to the SELECT-clause filter only; WHERE-clause `IN` sets
keep their prepared-set form so partition/granule pruning and lookup performance
are unaffected. Both the materialized-IDs path and the subquery path are covered.

### Test Plan

Adds `test_filtered_item_count_inlines_in_sets`, a regression guard that builds
the real `project_id IN [...]` filter and asserts the inlining removes every
`in()` over a constant array and emits `has(array(...), x)` instead. Existing
tests continue to pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01U8g3uHFHpntdvcwLkg8fym
---
 snuba/web/rpc/v1/endpoint_get_traces.py      | 43 ++++++++++-
 tests/web/rpc/v1/test_endpoint_get_traces.py | 76 +++++++++++++++++++-
 2 files changed, 116 insertions(+), 3 deletions(-)
diff --git a/snuba/web/rpc/v1/endpoint_get_traces.py b/snuba/web/rpc/v1/endpoint_get_traces.py
index a7a7a9a18f1..68c041f7efd 100644
--- a/snuba/web/rpc/v1/endpoint_get_traces.py
+++ b/snuba/web/rpc/v1/endpoint_get_traces.py
@@ -35,7 +35,7 @@
     literals_array,
     or_cond,
 )
-from snuba.query.expressions import DangerousRawSQL, Expression
+from snuba.query.expressions import DangerousRawSQL, Expression, FunctionCall
 from snuba.query.logical import Query
 from snuba.query.query_settings import HTTPQuerySettings, QuerySettings
 from snuba.request import Request as SnubaRequest
@@ -368,6 +368,35 @@ def _get_earliest_frontend_span_attribute(
     raise BadSnubaRPCRequestException(f"{key} had an unknown or unset type: {trace_attribute.type}")
 
 
+def _inline_in_sets(expression: Expression) -> Expression:
+    """Rewrite ``in(x, array(...))`` as ``has(array(...), x)``.
+
+    A constant ``IN`` set makes ClickHouse build an internal prepared set whose
+    server-generated identifier (``__set_<Type>_<hash>_<hash>``) is baked into the
+    result-block column name. The ``filtered_item_count`` aggregate embeds the trace
+    item filter inside a ``countIf`` in the SELECT clause, so that name is matched by
+    string across ``Remote`` nodes. On a mixed-version ClickHouse cluster the two
+    sides hash the set differently, so the names disagree and the distributed read
+    fails with ``Code: 10 ... Not found column ... While executing Remote.``
+    (SNUBA-9W6).
+
+    ``has`` over a constant array keeps the array inline in the column name, which is
+    byte-stable across versions. ``has(array, x)`` is equivalent to ``x IN (array)``
+    for scalar membership, and the surrounding NULL-handling wrapper built by
+    ``trace_item_filters_to_expression`` is preserved untouched.
+    """
+
+    def rewrite(exp: Expression) -> Expression:
+        if isinstance(exp, FunctionCall) and exp.function_name == "in" and len(exp.parameters) == 2:
+            lhs, rhs = exp.parameters
+            # Only constant arrays (literals_array -> array(...)) build a prepared set.
+            if isinstance(rhs, FunctionCall) and rhs.function_name == "array":
+                return f.has(rhs, lhs, alias=exp.alias)
+        return exp
+
+    return expression.transform(rewrite)
+
+
 def _build_snuba_request(
     request: GetTracesRequest,
     query: Query,
@@ -683,6 +712,12 @@ def _get_metadata_for_traces(
         else:
             item_type = TraceItemType.TRACE_ITEM_TYPE_SPAN
 
+        # filtered_item_count embeds this filter inside a SELECT-clause countIf, so any
+        # constant IN-set must be inlined to keep the result-block column name stable
+        # across mixed-version ClickHouse nodes on distributed reads (see _inline_in_sets).
+        if trace_item_filters_expression is not None:
+            trace_item_filters_expression = _inline_in_sets(trace_item_filters_expression)
+
         selected_columns: list[SelectedExpression] = []
         start_timestamp_requested = False
         for trace_attribute in request.attributes:
@@ -799,6 +834,12 @@ def _get_metadata_for_traces_with_subquery(
         else:
             item_type = TraceItemType.TRACE_ITEM_TYPE_SPAN
 
+        # filtered_item_count embeds this filter inside a SELECT-clause countIf, so any
+        # constant IN-set must be inlined to keep the result-block column name stable
+        # across mixed-version ClickHouse nodes on distributed reads (see _inline_in_sets).
+        if trace_item_filters_expression is not None:
+            trace_item_filters_expression = _inline_in_sets(trace_item_filters_expression)
+
         selected_columns: list[SelectedExpression] = []
         start_timestamp_requested = False
         for trace_attribute in request.attributes:
diff --git a/tests/web/rpc/v1/test_endpoint_get_traces.py b/tests/web/rpc/v1/test_endpoint_get_traces.py
index 3d1b9d14272..8e7d979ed77 100644
--- a/tests/web/rpc/v1/test_endpoint_get_traces.py
+++ b/tests/web/rpc/v1/test_endpoint_get_traces.py
@@ -21,7 +21,11 @@
     ResponseMeta,
     TraceItemType,
 )
-from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
+from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
+    AttributeKey,
+    AttributeValue,
+    IntArray,
+)
 from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
     ComparisonFilter,
     TraceItemFilter,
@@ -31,8 +35,13 @@
 from snuba import state
 from snuba.datasets.storages.factory import get_storage
 from snuba.datasets.storages.storage_key import StorageKey
+from snuba.query.expressions import FunctionCall, Literal
+from snuba.web.rpc.common.common import (
+    attribute_key_to_expression,
+    trace_item_filters_to_expression,
+)
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
-from snuba.web.rpc.v1.endpoint_get_traces import EndpointGetTraces
+from snuba.web.rpc.v1.endpoint_get_traces import EndpointGetTraces, _inline_in_sets
 from tests.base import BaseApiTest
 from tests.conftest import SnubaSetConfig
 from tests.helpers import write_raw_unprocessed_events
@@ -911,6 +920,69 @@ def trace_filter(
     )
 
 
+def _in_calls_over_arrays(expression: Any) -> list[FunctionCall]:
+    """All ``in(x, array(...))`` calls in ``expression`` — the form that builds a
+    constant prepared set (``__set_*``)."""
+    return [
+        exp
+        for exp in expression
+        if isinstance(exp, FunctionCall)
+        and exp.function_name == "in"
+        and len(exp.parameters) == 2
+        and isinstance(exp.parameters[1], FunctionCall)
+        and exp.parameters[1].function_name == "array"
+    ]
+
+
+def test_filtered_item_count_inlines_in_sets() -> None:
+    """Regression guard for SNUBA-9W6 (mixed-version distributed reads).
+
+    ``KEY_FILTERED_ITEM_COUNT`` becomes a ``countIf`` in the SELECT clause whose
+    condition is the trace item filter. A constant ``IN`` set there makes ClickHouse
+    bake a server-generated ``__set_<Type>_<hash>_<hash>`` identifier into the
+    result-block column name. On a mixed-version cluster the two sides hash the set
+    differently, so the column names disagree across ``Remote`` and the distributed
+    read fails with ``Code: 10 ... Not found column ... While executing Remote.``.
+    The set must therefore be inlined as ``has(array(...), x)``, which keeps the
+    array in the column name and is byte-stable across versions.
+    """
+    project_ids = [11, 22, 33, 44]
+    filter_expr = trace_item_filters_to_expression(
+        TraceItemFilter(
+            comparison_filter=ComparisonFilter(
+                key=AttributeKey(name="sentry.project_id", type=AttributeKey.TYPE_INT),
+                op=ComparisonFilter.OP_IN,
+                value=AttributeValue(val_int_array=IntArray(values=project_ids)),
+            ),
+        ),
+        attribute_key_to_expression,
+    )
+
+    # The raw filter builds an IN over the constant project-id array (the source of
+    # the unstable __set_* identifier).
+    before = _in_calls_over_arrays(filter_expr)
+    assert before, "expected the project_id filter to build an in() over a constant array"
+
+    rewritten = _inline_in_sets(filter_expr)
+
+    # After inlining, no IN over a constant array survives.
+    assert not _in_calls_over_arrays(rewritten), (
+        "in() over a constant array must be rewritten to has() to avoid __set_* identifiers"
+    )
+
+    # ...and a has(array(<project_ids>), x) is emitted instead.
+    has_over_project_ids = [
+        exp
+        for exp in rewritten
+        if isinstance(exp, FunctionCall)
+        and exp.function_name == "has"
+        and isinstance(exp.parameters[0], FunctionCall)
+        and exp.parameters[0].function_name == "array"
+        and [p.value for p in exp.parameters[0].parameters if isinstance(p, Literal)] == project_ids
+    ]
+    assert has_over_project_ids, "expected has(array(<project_ids>), x) after inlining"
+
+
 @pytest.mark.clickhouse_db
 @pytest.mark.redis_db
 class TestEndpointGetTracesCrossItem(TestEndpointGetTraces):

From 6fba426043e5c3903b091aee8427ba448a01d70c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 02:06:39 +0000
Subject: [PATCH 2/6] Fix mypy alias arg-type in _inline_in_sets

The DSL's f.has(...) types its kwargs as str, so passing the Optional[str]
in_cond alias failed strict mypy. Build the has() FunctionCall directly (as
map_key_exists does) to preserve the optional alias without a type error.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01U8g3uHFHpntdvcwLkg8fym
---
 snuba/web/rpc/v1/endpoint_get_traces.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/snuba/web/rpc/v1/endpoint_get_traces.py b/snuba/web/rpc/v1/endpoint_get_traces.py
index 68c041f7efd..238d15e45a5 100644
--- a/snuba/web/rpc/v1/endpoint_get_traces.py
+++ b/snuba/web/rpc/v1/endpoint_get_traces.py
@@ -391,7 +391,9 @@ def rewrite(exp: Expression) -> Expression:
             lhs, rhs = exp.parameters
             # Only constant arrays (literals_array -> array(...)) build a prepared set.
             if isinstance(rhs, FunctionCall) and rhs.function_name == "array":
-                return f.has(rhs, lhs, alias=exp.alias)
+                # Build the FunctionCall directly (rather than f.has(..., alias=...)) so the
+                # original Optional[str] alias is preserved without a type error.
+                return FunctionCall(exp.alias, "has", (rhs, lhs))
         return exp
 
     return expression.transform(rewrite)

From fbc981b6281731735e11b7630877103df6fd5798 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 02:40:55 +0000
Subject: [PATCH 3/6] Inline constant IN-sets in all SELECT-clause filter
 embeddings

Generalize the SNUBA-9W6 fix so the "Not found column ... While executing
Remote." failure on mixed-version ClickHouse clusters can't recur in other
SELECT-clause spots that embed a user filter.

A constant IN-set leaks an unstable server-generated __set_<hash> identifier
into the result-block column name only when the expression is in a SELECT
clause / projection / aggregate condition / HAVING. The same user filter in a
WHERE clause is fine (and the IN-set is needed there for partition/primary-key
pruning), so the rewrite stays scoped to SELECT-clause embeddings.

- Move the in()->has() rewrite into a shared common.inline_in_to_has() helper,
  documenting that it must not be applied to WHERE conditions.
- Apply it in resolvers/common/aggregation._get_condition_in_aggregation, which
  feeds every conditional aggregate (countIf/sumIf/...) used by the time-series
  and trace-item-table endpoints.
- Apply it to the HAVING countIf in cross_item_queries (the WHERE or_cond keeps
  its prepared IN-sets).
- endpoint_get_traces now uses the shared helper.
- Add a regression test for the conditional-aggregation path.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01U8g3uHFHpntdvcwLkg8fym
---
 snuba/web/rpc/common/common.py                | 38 +++++++++++++
 snuba/web/rpc/v1/endpoint_get_traces.py       | 42 ++------------
 .../rpc/v1/resolvers/common/aggregation.py    |  8 ++-
 .../v1/resolvers/common/cross_item_queries.py | 10 +++-
 tests/web/rpc/test_aggregation.py             | 57 +++++++++++++++++++
 tests/web/rpc/v1/test_endpoint_get_traces.py  |  5 +-
 6 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py
index 3d7c27328c6..ba0f431acbb 100644
--- a/snuba/web/rpc/common/common.py
+++ b/snuba/web/rpc/common/common.py
@@ -50,6 +50,44 @@
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
 
 
+def inline_in_to_has(expression: Expression) -> Expression:
+    """Rewrite ``in(x, array(...))`` as ``has(array(...), x)`` for SELECT-clause use.
+
+    A constant ``IN`` set makes ClickHouse build an internal prepared set whose
+    server-generated identifier (``__set_<Type>_<hash>_<hash>``) is baked into the
+    result-block column name. When such an expression lands in a SELECT-clause
+    expression — a conditional aggregate (``countIf``/``sumIf``/...), a ``HAVING``
+    aggregate, or an ``arrayJoin`` projection — that name is matched by string across
+    ``Remote`` nodes. On a mixed-version ClickHouse cluster the two sides hash the set
+    differently, so the names disagree and the distributed read fails with
+    ``Code: 10 ... Not found column ... While executing Remote.`` (SNUBA-9W6,
+    SNUBA-B82).
+
+    ``has`` over a constant array keeps the array inline in the column name, which is
+    byte-stable across versions, and ``has(array, x)`` is equivalent to ``x IN (array)``
+    for scalar membership. The surrounding NULL-handling wrapper built by
+    ``trace_item_filters_to_expression`` is preserved untouched.
+
+    Only apply this to expressions destined for a SELECT clause / projection / aggregate
+    condition / ``HAVING`` / ``GROUP BY`` / ``ORDER BY`` computed column. Do NOT apply it
+    to WHERE-clause conditions: there the prepared ``IN`` set is what lets ClickHouse
+    prune partitions and primary-key ranges, so replacing it with ``has`` would force
+    full scans.
+    """
+
+    def rewrite(exp: Expression) -> Expression:
+        if isinstance(exp, FunctionCall) and exp.function_name == "in" and len(exp.parameters) == 2:
+            lhs, rhs = exp.parameters
+            # Only constant arrays (literals_array -> array(...)) build a prepared set.
+            if isinstance(rhs, FunctionCall) and rhs.function_name == "array":
+                # Build the FunctionCall directly (rather than f.has(..., alias=...)) so the
+                # original Optional[str] alias is preserved without a type error.
+                return FunctionCall(exp.alias, "has", (rhs, lhs))
+        return exp
+
+    return expression.transform(rewrite)
+
+
 def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
     """Convert an AttributeKey proto to a Snuba Expression.
 
diff --git a/snuba/web/rpc/v1/endpoint_get_traces.py b/snuba/web/rpc/v1/endpoint_get_traces.py
index 238d15e45a5..4cf595cc355 100644
--- a/snuba/web/rpc/v1/endpoint_get_traces.py
+++ b/snuba/web/rpc/v1/endpoint_get_traces.py
@@ -35,7 +35,7 @@
     literals_array,
     or_cond,
 )
-from snuba.query.expressions import DangerousRawSQL, Expression, FunctionCall
+from snuba.query.expressions import DangerousRawSQL, Expression
 from snuba.query.logical import Query
 from snuba.query.query_settings import HTTPQuerySettings, QuerySettings
 from snuba.request import Request as SnubaRequest
@@ -44,6 +44,7 @@
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
     base_conditions_and,
+    inline_in_to_has,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
 )
@@ -368,37 +369,6 @@ def _get_earliest_frontend_span_attribute(
     raise BadSnubaRPCRequestException(f"{key} had an unknown or unset type: {trace_attribute.type}")
 
 
-def _inline_in_sets(expression: Expression) -> Expression:
-    """Rewrite ``in(x, array(...))`` as ``has(array(...), x)``.
-
-    A constant ``IN`` set makes ClickHouse build an internal prepared set whose
-    server-generated identifier (``__set_<Type>_<hash>_<hash>``) is baked into the
-    result-block column name. The ``filtered_item_count`` aggregate embeds the trace
-    item filter inside a ``countIf`` in the SELECT clause, so that name is matched by
-    string across ``Remote`` nodes. On a mixed-version ClickHouse cluster the two
-    sides hash the set differently, so the names disagree and the distributed read
-    fails with ``Code: 10 ... Not found column ... While executing Remote.``
-    (SNUBA-9W6).
-
-    ``has`` over a constant array keeps the array inline in the column name, which is
-    byte-stable across versions. ``has(array, x)`` is equivalent to ``x IN (array)``
-    for scalar membership, and the surrounding NULL-handling wrapper built by
-    ``trace_item_filters_to_expression`` is preserved untouched.
-    """
-
-    def rewrite(exp: Expression) -> Expression:
-        if isinstance(exp, FunctionCall) and exp.function_name == "in" and len(exp.parameters) == 2:
-            lhs, rhs = exp.parameters
-            # Only constant arrays (literals_array -> array(...)) build a prepared set.
-            if isinstance(rhs, FunctionCall) and rhs.function_name == "array":
-                # Build the FunctionCall directly (rather than f.has(..., alias=...)) so the
-                # original Optional[str] alias is preserved without a type error.
-                return FunctionCall(exp.alias, "has", (rhs, lhs))
-        return exp
-
-    return expression.transform(rewrite)
-
-
 def _build_snuba_request(
     request: GetTracesRequest,
     query: Query,
@@ -716,9 +686,9 @@ def _get_metadata_for_traces(
 
         # filtered_item_count embeds this filter inside a SELECT-clause countIf, so any
         # constant IN-set must be inlined to keep the result-block column name stable
-        # across mixed-version ClickHouse nodes on distributed reads (see _inline_in_sets).
+        # across mixed-version ClickHouse nodes on distributed reads (see inline_in_to_has).
         if trace_item_filters_expression is not None:
-            trace_item_filters_expression = _inline_in_sets(trace_item_filters_expression)
+            trace_item_filters_expression = inline_in_to_has(trace_item_filters_expression)
 
         selected_columns: list[SelectedExpression] = []
         start_timestamp_requested = False
@@ -838,9 +808,9 @@ def _get_metadata_for_traces_with_subquery(
 
         # filtered_item_count embeds this filter inside a SELECT-clause countIf, so any
         # constant IN-set must be inlined to keep the result-block column name stable
-        # across mixed-version ClickHouse nodes on distributed reads (see _inline_in_sets).
+        # across mixed-version ClickHouse nodes on distributed reads (see inline_in_to_has).
         if trace_item_filters_expression is not None:
-            trace_item_filters_expression = _inline_in_sets(trace_item_filters_expression)
+            trace_item_filters_expression = inline_in_to_has(trace_item_filters_expression)
 
         selected_columns: list[SelectedExpression] = []
         start_timestamp_requested = False
diff --git a/snuba/web/rpc/v1/resolvers/common/aggregation.py b/snuba/web/rpc/v1/resolvers/common/aggregation.py
index 5cc7a0135cb..cc313344d37 100644
--- a/snuba/web/rpc/v1/resolvers/common/aggregation.py
+++ b/snuba/web/rpc/v1/resolvers/common/aggregation.py
@@ -30,6 +30,7 @@
 )
 from snuba.web.rpc.common.common import (
     get_field_existence_expression,
+    inline_in_to_has,
     trace_item_filters_to_expression,
 )
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
@@ -66,8 +67,11 @@ def _get_condition_in_aggregation(
 ) -> Expression:
     condition_in_aggregation: Expression = literal(True)
     if isinstance(aggregation, AttributeConditionalAggregation):
-        condition_in_aggregation = trace_item_filters_to_expression(
-            aggregation.filter, attribute_key_to_expression
+        # This condition is embedded in SELECT-clause conditional aggregates (countIf,
+        # sumIf, ...), so inline any constant IN-set to keep the result-block column name
+        # stable across mixed-version ClickHouse nodes on distributed reads.
+        condition_in_aggregation = inline_in_to_has(
+            trace_item_filters_to_expression(aggregation.filter, attribute_key_to_expression)
         )
     return condition_in_aggregation
 
diff --git a/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py b/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py
index fc6fb1389d7..2f754f1a17c 100644
--- a/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py
+++ b/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py
@@ -28,6 +28,7 @@
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
     base_conditions_and,
+    inline_in_to_has,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
 )
@@ -88,8 +89,15 @@ def get_trace_ids_sql_for_cross_item_query(
             )
 
     if len(filter_expressions) > 1:
+        # The countIf goes in the HAVING clause (a SELECT-clause aggregate), so inline any
+        # constant IN-set to keep its result-block column name stable across mixed-version
+        # ClickHouse nodes. The WHERE-clause or_cond below keeps its prepared IN-sets for
+        # partition/primary-key pruning.
         trace_item_filters_and_expression = and_cond(
-            *[f.greater(f.countIf(expression), 0) for expression in filter_expressions]
+            *[
+                f.greater(f.countIf(inline_in_to_has(expression)), 0)
+                for expression in filter_expressions
+            ]
         )
         trace_item_filters_or_expression = or_cond(*filter_expressions)
     elif len(filter_expressions) == 1:
diff --git a/tests/web/rpc/test_aggregation.py b/tests/web/rpc/test_aggregation.py
index 8c590b7c05f..3fd6a6fe5c5 100644
--- a/tests/web/rpc/test_aggregation.py
+++ b/tests/web/rpc/test_aggregation.py
@@ -6,10 +6,13 @@
 )
 from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
     AttributeKey,
+    AttributeValue,
     ExtrapolationMode,
     Function,
+    IntArray,
     Reliability,
 )
+from sentry_protos.snuba.v1.trace_item_filter_pb2 import ComparisonFilter, TraceItemFilter
 
 from snuba.query.expressions import Column, FunctionCall, JsonPath, Literal, SubscriptableReference
 from snuba.web.rpc.common.common import (
@@ -328,3 +331,57 @@ def test_aggregation_to_expression_sum_type_array_raises() -> None:
     )
     with pytest.raises(BadSnubaRPCRequestException, match="not supported for array attribute"):
         aggregation_to_expression(agg, attribute_key_to_expression)
+
+
+def test_conditional_aggregation_inlines_in_sets() -> None:
+    """Regression guard for SNUBA-9W6 (mixed-version distributed reads).
+
+    A conditional aggregation's filter is embedded in a SELECT-clause ``countIf``/
+    ``sumIf``. A constant ``IN`` set there bakes a server-generated
+    ``__set_<Type>_<hash>_<hash>`` identifier into the result-block column name; on a
+    mixed-version cluster the two sides hash it differently and the distributed read
+    fails with ``Code: 10 ... Not found column ... While executing Remote.``. The set
+    must be inlined as ``has(array(...), x)`` instead.
+    """
+    project_ids = [11, 22, 33]
+    agg = AttributeConditionalAggregation(
+        aggregate=Function.FUNCTION_SUM,
+        key=AttributeKey(type=AttributeKey.TYPE_DOUBLE, name="my.field"),
+        label="sum(my.field)",
+        extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
+        filter=TraceItemFilter(
+            comparison_filter=ComparisonFilter(
+                key=AttributeKey(name="sentry.project_id", type=AttributeKey.TYPE_INT),
+                op=ComparisonFilter.OP_IN,
+                value=AttributeValue(val_int_array=IntArray(values=project_ids)),
+            )
+        ),
+    )
+
+    expr = aggregation_to_expression(agg, attribute_key_to_expression)
+
+    # No in() over a constant array may survive (it would reintroduce the __set_* id).
+    in_over_arrays = [
+        e
+        for e in expr
+        if isinstance(e, FunctionCall)
+        and e.function_name == "in"
+        and len(e.parameters) == 2
+        and isinstance(e.parameters[1], FunctionCall)
+        and e.parameters[1].function_name == "array"
+    ]
+    assert not in_over_arrays, (
+        "conditional aggregation must not embed an in() over a constant array"
+    )
+
+    # ...and the membership is emitted as has(array(<project_ids>), x).
+    has_over_pids = [
+        e
+        for e in expr
+        if isinstance(e, FunctionCall)
+        and e.function_name == "has"
+        and isinstance(e.parameters[0], FunctionCall)
+        and e.parameters[0].function_name == "array"
+        and [p.value for p in e.parameters[0].parameters if isinstance(p, Literal)] == project_ids
+    ]
+    assert has_over_pids, "expected has(array(<project_ids>), x) in the conditional aggregate"
diff --git a/tests/web/rpc/v1/test_endpoint_get_traces.py b/tests/web/rpc/v1/test_endpoint_get_traces.py
index 8e7d979ed77..af095f7ad2c 100644
--- a/tests/web/rpc/v1/test_endpoint_get_traces.py
+++ b/tests/web/rpc/v1/test_endpoint_get_traces.py
@@ -38,10 +38,11 @@
 from snuba.query.expressions import FunctionCall, Literal
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
+    inline_in_to_has,
     trace_item_filters_to_expression,
 )
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
-from snuba.web.rpc.v1.endpoint_get_traces import EndpointGetTraces, _inline_in_sets
+from snuba.web.rpc.v1.endpoint_get_traces import EndpointGetTraces
 from tests.base import BaseApiTest
 from tests.conftest import SnubaSetConfig
 from tests.helpers import write_raw_unprocessed_events
@@ -963,7 +964,7 @@ def test_filtered_item_count_inlines_in_sets() -> None:
     before = _in_calls_over_arrays(filter_expr)
     assert before, "expected the project_id filter to build an in() over a constant array"
 
-    rewritten = _inline_in_sets(filter_expr)
+    rewritten = inline_in_to_has(filter_expr)
 
     # After inlining, no IN over a constant array survives.
     assert not _in_calls_over_arrays(rewritten), (

From 913bca6759d4aca486c38685ceb1be56e8cc73e6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 03:05:21 +0000
Subject: [PATCH 4/6] Build SELECT-clause membership as has() at construction
 instead of inlining

Replace the post-hoc inline_in_to_has() traversal with a membership_as_has flag
threaded through trace_item_filters_to_expression. The filter builder now emits
has(array, x) directly at the construction call site when the result is destined
for a SELECT clause / projection / aggregate condition / HAVING, and keeps
x IN (array) (the default) for WHERE clauses so prepared-set partition/primary-key
pruning is preserved.

Call sites that build SELECT-clause expressions pass membership_as_has=True:
- aggregation._get_condition_in_aggregation (all conditional aggregates: time
  series + trace item table; also covers SNUBA-A1W's failure_rate NOT IN filter)
- cross_item_queries builds a separate has()-form list for the HAVING countIf,
  keeping the WHERE or_cond on the prepared IN-set
- endpoint_get_traces._get_trace_item_filter_expressions (filtered_item_count)

The OP_IN / OP_NOT_IN handlers and _analyzer_safe_in_expression (map-backed) all
route through a shared _in_or_has() helper. Tests updated to assert the builder
flag behavior (default keeps in(), flag emits has()).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01U8g3uHFHpntdvcwLkg8fym
---
 snuba/web/rpc/common/common.py                | 99 ++++++++++---------
 snuba/web/rpc/v1/endpoint_get_traces.py       | 19 ++--
 .../rpc/v1/resolvers/common/aggregation.py    | 10 +-
 .../v1/resolvers/common/cross_item_queries.py | 29 ++++--
 tests/web/rpc/test_aggregation.py             |  8 +-
 tests/web/rpc/v1/test_endpoint_get_traces.py  | 61 ++++++------
 6 files changed, 118 insertions(+), 108 deletions(-)

diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py
index ba0f431acbb..0f42e712196 100644
--- a/snuba/web/rpc/common/common.py
+++ b/snuba/web/rpc/common/common.py
@@ -50,42 +50,23 @@
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
 
 
-def inline_in_to_has(expression: Expression) -> Expression:
-    """Rewrite ``in(x, array(...))`` as ``has(array(...), x)`` for SELECT-clause use.
-
-    A constant ``IN`` set makes ClickHouse build an internal prepared set whose
-    server-generated identifier (``__set_<Type>_<hash>_<hash>``) is baked into the
-    result-block column name. When such an expression lands in a SELECT-clause
-    expression — a conditional aggregate (``countIf``/``sumIf``/...), a ``HAVING``
-    aggregate, or an ``arrayJoin`` projection — that name is matched by string across
-    ``Remote`` nodes. On a mixed-version ClickHouse cluster the two sides hash the set
-    differently, so the names disagree and the distributed read fails with
-    ``Code: 10 ... Not found column ... While executing Remote.`` (SNUBA-9W6,
-    SNUBA-B82).
-
-    ``has`` over a constant array keeps the array inline in the column name, which is
-    byte-stable across versions, and ``has(array, x)`` is equivalent to ``x IN (array)``
-    for scalar membership. The surrounding NULL-handling wrapper built by
-    ``trace_item_filters_to_expression`` is preserved untouched.
-
-    Only apply this to expressions destined for a SELECT clause / projection / aggregate
-    condition / ``HAVING`` / ``GROUP BY`` / ``ORDER BY`` computed column. Do NOT apply it
-    to WHERE-clause conditions: there the prepared ``IN`` set is what lets ClickHouse
-    prune partitions and primary-key ranges, so replacing it with ``has`` would force
-    full scans.
+def _in_or_has(value: Expression, array: Expression, *, as_has: bool) -> Expression:
+    """Build the membership test ``value IN array``.
+
+    Returns ``in(value, array)`` by default, so ClickHouse keeps a prepared set for
+    partition/primary-key pruning — correct for WHERE clauses. When ``as_has`` is set,
+    returns ``has(array, value)`` instead, for expressions that land in a SELECT clause
+    / projection / aggregate condition / ``HAVING``. There the prepared set's
+    server-generated ``__set_<Type>_<hash>_<hash>`` identifier leaks into the
+    result-block column name and is not byte-stable across mixed-version distributed
+    ClickHouse nodes, which fails the read with
+    ``Code: 10 ... Not found column ... While executing Remote.`` (SNUBA-9W6, SNUBA-B82).
+    ``has`` over a constant array keeps the array inline in the column name and is
+    equivalent to ``value IN (array)`` for scalar membership.
     """
-
-    def rewrite(exp: Expression) -> Expression:
-        if isinstance(exp, FunctionCall) and exp.function_name == "in" and len(exp.parameters) == 2:
-            lhs, rhs = exp.parameters
-            # Only constant arrays (literals_array -> array(...)) build a prepared set.
-            if isinstance(rhs, FunctionCall) and rhs.function_name == "array":
-                # Build the FunctionCall directly (rather than f.has(..., alias=...)) so the
-                # original Optional[str] alias is preserved without a type error.
-                return FunctionCall(exp.alias, "has", (rhs, lhs))
-        return exp
-
-    return expression.transform(rewrite)
+    if as_has:
+        return f.has(array, value)
+    return in_cond(value, array)
 
 
 def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
@@ -450,16 +431,18 @@ def _analyzer_safe_in_expression(
     negated: bool,
     ignore_case: bool = False,
     guard: bool = True,
+    membership_as_has: bool = False,
 ) -> Expression:
     """``IN`` / ``NOT IN`` as ``[not] in(value, set)``, wrapped in
     ``and(exists, ...)`` only when ``guard`` is set (see ``_map_backed_operands``
     and ``_comparison_can_match_column_default``). The value lists carry only
     scalars, so the set never contains NULL — the legacy ``has(set, NULL)`` branch
-    was always ``false``."""
+    was always ``false``. ``membership_as_has`` emits ``has(set, value)`` instead of
+    ``in(value, set)`` for SELECT-clause use (see ``_in_or_has``)."""
     value, exists = _map_backed_operands(k)
     if ignore_case:
         value = f.lower(value)
-    membership = in_cond(value, v_expression)
+    membership = _in_or_has(value, v_expression, as_has=membership_as_has)
     present = and_cond(exists, membership) if guard else membership
     return not_cond(present) if negated else present
 
@@ -745,11 +728,18 @@ def _any_attribute_filter_to_expression(
 def trace_item_filters_to_expression(
     item_filter: TraceItemFilter,
     attribute_key_to_expression: Callable[[AttributeKey], Expression],
+    membership_as_has: bool = False,
 ) -> Expression:
     """
     Trace Item Filters are things like (span.id=12345 AND start_timestamp >= "june 4th, 2024")
     This maps those filters into an expression which can be used in a WHERE clause
     :param item_filter:
+    :param membership_as_has: build ``IN``/``NOT IN`` membership as ``has(array, x)``
+        rather than ``x IN (array)``. Pass ``True`` only when the result lands in a
+        SELECT clause / projection / aggregate condition / ``HAVING`` — there a constant
+        ``IN`` set leaks an unstable ``__set_*`` identifier into the result-block column
+        name and breaks mixed-version distributed reads (see ``_in_or_has``). Leave the
+        default for WHERE clauses, where the prepared ``IN`` set drives pruning.
     :return:
     """
     if item_filter.HasField("and_filter"):
@@ -757,9 +747,14 @@ def trace_item_filters_to_expression(
         if len(filters) == 0:
             return literal(True)
         elif len(filters) == 1:
-            return trace_item_filters_to_expression(filters[0], attribute_key_to_expression)
+            return trace_item_filters_to_expression(
+                filters[0], attribute_key_to_expression, membership_as_has
+            )
         return and_cond(
-            *(trace_item_filters_to_expression(x, attribute_key_to_expression) for x in filters)
+            *(
+                trace_item_filters_to_expression(x, attribute_key_to_expression, membership_as_has)
+                for x in filters
+            )
         )
 
     if item_filter.HasField("or_filter"):
@@ -767,9 +762,14 @@ def trace_item_filters_to_expression(
         if len(filters) == 0:
             raise BadSnubaRPCRequestException("Invalid trace item filter, empty 'or' clause")
         elif len(filters) == 1:
-            return trace_item_filters_to_expression(filters[0], attribute_key_to_expression)
+            return trace_item_filters_to_expression(
+                filters[0], attribute_key_to_expression, membership_as_has
+            )
         return or_cond(
-            *(trace_item_filters_to_expression(x, attribute_key_to_expression) for x in filters)
+            *(
+                trace_item_filters_to_expression(x, attribute_key_to_expression, membership_as_has)
+                for x in filters
+            )
         )
 
     if item_filter.HasField("not_filter"):
@@ -778,11 +778,18 @@ def trace_item_filters_to_expression(
             raise BadSnubaRPCRequestException("Invalid trace item filter, empty 'not' clause")
         elif len(filters) == 1:
             return not_cond(
-                trace_item_filters_to_expression(filters[0], attribute_key_to_expression)
+                trace_item_filters_to_expression(
+                    filters[0], attribute_key_to_expression, membership_as_has
+                )
             )
         return not_cond(
             and_cond(
-                *(trace_item_filters_to_expression(x, attribute_key_to_expression) for x in filters)
+                *(
+                    trace_item_filters_to_expression(
+                        x, attribute_key_to_expression, membership_as_has
+                    )
+                    for x in filters
+                )
             )
         )
 
@@ -1003,10 +1010,11 @@ def trace_item_filters_to_expression(
                     negated=False,
                     ignore_case=ignore_case,
                     guard=_comparison_can_match_column_default(k.type, v, value_type),
+                    membership_as_has=membership_as_has,
                 )
             if ignore_case:
                 k_expression = f.lower(k_expression)
-            expr = in_cond(k_expression, v_expression)
+            expr = _in_or_has(k_expression, v_expression, as_has=membership_as_has)
             expr_with_null = or_cond(
                 expr,
                 and_cond(f.isNull(k_expression), f.has(v_expression, literal(None))),
@@ -1037,10 +1045,11 @@ def trace_item_filters_to_expression(
                     negated=True,
                     ignore_case=ignore_case,
                     guard=_comparison_can_match_column_default(k.type, v, value_type),
+                    membership_as_has=membership_as_has,
                 )
             if ignore_case:
                 k_expression = f.lower(k_expression)
-            expr = not_cond(in_cond(k_expression, v_expression))
+            expr = not_cond(_in_or_has(k_expression, v_expression, as_has=membership_as_has))
             expr_with_null = or_cond(
                 expr,
                 and_cond(
diff --git a/snuba/web/rpc/v1/endpoint_get_traces.py b/snuba/web/rpc/v1/endpoint_get_traces.py
index 4cf595cc355..7096690d012 100644
--- a/snuba/web/rpc/v1/endpoint_get_traces.py
+++ b/snuba/web/rpc/v1/endpoint_get_traces.py
@@ -44,7 +44,6 @@
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
     base_conditions_and,
-    inline_in_to_has,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
 )
@@ -561,6 +560,11 @@ def _get_trace_item_filter_expressions(
     ) -> dict[TraceItemType.ValueType, Expression]:
         """
         Returns a dict mapping item types to a filter expression for that item type.
+
+        These expressions are only ever embedded in the ``filtered_item_count`` countIf
+        in the SELECT clause, so membership is built as ``has(array, x)``
+        (``membership_as_has``) to keep the result-block column name stable across
+        mixed-version distributed ClickHouse nodes (see common._in_or_has).
         """
         filters_by_item_type: dict[TraceItemType.ValueType, list[TraceItemFilter]] = defaultdict(
             list
@@ -579,6 +583,7 @@ def _get_trace_item_filter_expressions(
                         ),
                     ),
                     attribute_key_to_expression,
+                    membership_as_has=True,
                 ),
             )
 
@@ -684,12 +689,6 @@ def _get_metadata_for_traces(
         else:
             item_type = TraceItemType.TRACE_ITEM_TYPE_SPAN
 
-        # filtered_item_count embeds this filter inside a SELECT-clause countIf, so any
-        # constant IN-set must be inlined to keep the result-block column name stable
-        # across mixed-version ClickHouse nodes on distributed reads (see inline_in_to_has).
-        if trace_item_filters_expression is not None:
-            trace_item_filters_expression = inline_in_to_has(trace_item_filters_expression)
-
         selected_columns: list[SelectedExpression] = []
         start_timestamp_requested = False
         for trace_attribute in request.attributes:
@@ -806,12 +805,6 @@ def _get_metadata_for_traces_with_subquery(
         else:
             item_type = TraceItemType.TRACE_ITEM_TYPE_SPAN
 
-        # filtered_item_count embeds this filter inside a SELECT-clause countIf, so any
-        # constant IN-set must be inlined to keep the result-block column name stable
-        # across mixed-version ClickHouse nodes on distributed reads (see inline_in_to_has).
-        if trace_item_filters_expression is not None:
-            trace_item_filters_expression = inline_in_to_has(trace_item_filters_expression)
-
         selected_columns: list[SelectedExpression] = []
         start_timestamp_requested = False
         for trace_attribute in request.attributes:
diff --git a/snuba/web/rpc/v1/resolvers/common/aggregation.py b/snuba/web/rpc/v1/resolvers/common/aggregation.py
index cc313344d37..674e0cc69ad 100644
--- a/snuba/web/rpc/v1/resolvers/common/aggregation.py
+++ b/snuba/web/rpc/v1/resolvers/common/aggregation.py
@@ -30,7 +30,6 @@
 )
 from snuba.web.rpc.common.common import (
     get_field_existence_expression,
-    inline_in_to_has,
     trace_item_filters_to_expression,
 )
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
@@ -68,10 +67,11 @@ def _get_condition_in_aggregation(
     condition_in_aggregation: Expression = literal(True)
     if isinstance(aggregation, AttributeConditionalAggregation):
         # This condition is embedded in SELECT-clause conditional aggregates (countIf,
-        # sumIf, ...), so inline any constant IN-set to keep the result-block column name
-        # stable across mixed-version ClickHouse nodes on distributed reads.
-        condition_in_aggregation = inline_in_to_has(
-            trace_item_filters_to_expression(aggregation.filter, attribute_key_to_expression)
+        # sumIf, ...), so build any constant IN-set as has(array, x) to keep the
+        # result-block column name stable across mixed-version ClickHouse nodes on
+        # distributed reads (membership_as_has, see common._in_or_has).
+        condition_in_aggregation = trace_item_filters_to_expression(
+            aggregation.filter, attribute_key_to_expression, membership_as_has=True
         )
     return condition_in_aggregation
 
diff --git a/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py b/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py
index 2f754f1a17c..b51ddf6adef 100644
--- a/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py
+++ b/snuba/web/rpc/v1/resolvers/common/cross_item_queries.py
@@ -28,7 +28,6 @@
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
     base_conditions_and,
-    inline_in_to_has,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
 )
@@ -66,7 +65,13 @@ def get_trace_ids_sql_for_cross_item_query(
     Returns:
         tuple: (sql_string, query_result) where query_result contains metadata like sampling_tier
     """
+    # filter_expressions feed the WHERE or_cond (keep the prepared IN-sets for
+    # partition/primary-key pruning); having_filter_expressions feed the HAVING countIf
+    # (a SELECT-clause aggregate), where the membership must be has(array, x) so its
+    # result-block column name is stable across mixed-version ClickHouse nodes
+    # (membership_as_has, see common._in_or_has).
     filter_expressions = []
+    having_filter_expressions = []
     if trace_filters:
         converted_trace_filters = [trace_filter for trace_filter in trace_filters]
         if isinstance(trace_filters[0], GetTracesRequest.TraceFilter):
@@ -78,26 +83,30 @@ def get_trace_ids_sql_for_cross_item_query(
             ]
 
         for trace_filter in converted_trace_filters:
+            item_type_cond = f.equals(column("item_type"), trace_filter.item_type)
             filter_expressions.append(
                 and_cond(
-                    f.equals(column("item_type"), trace_filter.item_type),
+                    item_type_cond,
                     trace_item_filters_to_expression(
                         trace_filter.filter,
                         attribute_key_to_expression,
                     ),
                 )
             )
+            having_filter_expressions.append(
+                and_cond(
+                    item_type_cond,
+                    trace_item_filters_to_expression(
+                        trace_filter.filter,
+                        attribute_key_to_expression,
+                        membership_as_has=True,
+                    ),
+                )
+            )
 
     if len(filter_expressions) > 1:
-        # The countIf goes in the HAVING clause (a SELECT-clause aggregate), so inline any
-        # constant IN-set to keep its result-block column name stable across mixed-version
-        # ClickHouse nodes. The WHERE-clause or_cond below keeps its prepared IN-sets for
-        # partition/primary-key pruning.
         trace_item_filters_and_expression = and_cond(
-            *[
-                f.greater(f.countIf(inline_in_to_has(expression)), 0)
-                for expression in filter_expressions
-            ]
+            *[f.greater(f.countIf(expression), 0) for expression in having_filter_expressions]
         )
         trace_item_filters_or_expression = or_cond(*filter_expressions)
     elif len(filter_expressions) == 1:
diff --git a/tests/web/rpc/test_aggregation.py b/tests/web/rpc/test_aggregation.py
index 3fd6a6fe5c5..cfb938ec7d8 100644
--- a/tests/web/rpc/test_aggregation.py
+++ b/tests/web/rpc/test_aggregation.py
@@ -333,15 +333,15 @@ def test_aggregation_to_expression_sum_type_array_raises() -> None:
         aggregation_to_expression(agg, attribute_key_to_expression)
 
 
-def test_conditional_aggregation_inlines_in_sets() -> None:
-    """Regression guard for SNUBA-9W6 (mixed-version distributed reads).
+def test_conditional_aggregation_uses_has_for_in_sets() -> None:
+    """Regression guard for SNUBA-9W6 / SNUBA-A1W (mixed-version distributed reads).
 
     A conditional aggregation's filter is embedded in a SELECT-clause ``countIf``/
     ``sumIf``. A constant ``IN`` set there bakes a server-generated
     ``__set_<Type>_<hash>_<hash>`` identifier into the result-block column name; on a
     mixed-version cluster the two sides hash it differently and the distributed read
-    fails with ``Code: 10 ... Not found column ... While executing Remote.``. The set
-    must be inlined as ``has(array(...), x)`` instead.
+    fails with ``Code: 10 ... Not found column ... While executing Remote.``. The
+    membership must therefore be built as ``has(array(...), x)`` instead.
     """
     project_ids = [11, 22, 33]
     agg = AttributeConditionalAggregation(
diff --git a/tests/web/rpc/v1/test_endpoint_get_traces.py b/tests/web/rpc/v1/test_endpoint_get_traces.py
index af095f7ad2c..30b94387792 100644
--- a/tests/web/rpc/v1/test_endpoint_get_traces.py
+++ b/tests/web/rpc/v1/test_endpoint_get_traces.py
@@ -38,7 +38,6 @@
 from snuba.query.expressions import FunctionCall, Literal
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
-    inline_in_to_has,
     trace_item_filters_to_expression,
 )
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
@@ -935,53 +934,53 @@ def _in_calls_over_arrays(expression: Any) -> list[FunctionCall]:
     ]
 
 
-def test_filtered_item_count_inlines_in_sets() -> None:
+def test_filter_membership_as_has_for_select_clause() -> None:
     """Regression guard for SNUBA-9W6 (mixed-version distributed reads).
 
-    ``KEY_FILTERED_ITEM_COUNT`` becomes a ``countIf`` in the SELECT clause whose
-    condition is the trace item filter. A constant ``IN`` set there makes ClickHouse
-    bake a server-generated ``__set_<Type>_<hash>_<hash>`` identifier into the
-    result-block column name. On a mixed-version cluster the two sides hash the set
-    differently, so the column names disagree across ``Remote`` and the distributed
-    read fails with ``Code: 10 ... Not found column ... While executing Remote.``.
-    The set must therefore be inlined as ``has(array(...), x)``, which keeps the
-    array in the column name and is byte-stable across versions.
+    The ``filtered_item_count`` filter (and every conditional aggregate) is embedded in
+    a SELECT-clause ``countIf``. A constant ``IN`` set there bakes a server-generated
+    ``__set_<Type>_<hash>_<hash>`` identifier into the result-block column name; on a
+    mixed-version cluster the two sides hash it differently and the distributed read
+    fails with ``Code: 10 ... Not found column ... While executing Remote.``.
+
+    ``trace_item_filters_to_expression(..., membership_as_has=True)`` therefore builds
+    the membership as ``has(array(...), x)`` — byte-stable across versions — while the
+    default keeps ``x IN (array)`` so WHERE clauses retain their prepared set for
+    partition/primary-key pruning.
     """
     project_ids = [11, 22, 33, 44]
-    filter_expr = trace_item_filters_to_expression(
-        TraceItemFilter(
-            comparison_filter=ComparisonFilter(
-                key=AttributeKey(name="sentry.project_id", type=AttributeKey.TYPE_INT),
-                op=ComparisonFilter.OP_IN,
-                value=AttributeValue(val_int_array=IntArray(values=project_ids)),
-            ),
+    proto_filter = TraceItemFilter(
+        comparison_filter=ComparisonFilter(
+            key=AttributeKey(name="sentry.project_id", type=AttributeKey.TYPE_INT),
+            op=ComparisonFilter.OP_IN,
+            value=AttributeValue(val_int_array=IntArray(values=project_ids)),
         ),
-        attribute_key_to_expression,
     )
 
-    # The raw filter builds an IN over the constant project-id array (the source of
-    # the unstable __set_* identifier).
-    before = _in_calls_over_arrays(filter_expr)
-    assert before, "expected the project_id filter to build an in() over a constant array"
-
-    rewritten = inline_in_to_has(filter_expr)
-
-    # After inlining, no IN over a constant array survives.
-    assert not _in_calls_over_arrays(rewritten), (
-        "in() over a constant array must be rewritten to has() to avoid __set_* identifiers"
+    # Default (WHERE) form keeps the prepared IN-set over the constant array.
+    where_expr = trace_item_filters_to_expression(proto_filter, attribute_key_to_expression)
+    assert _in_calls_over_arrays(where_expr), (
+        "WHERE-form filters must keep in() over the constant array (needed for pruning)"
     )
 
-    # ...and a has(array(<project_ids>), x) is emitted instead.
+    # SELECT-clause form (membership_as_has=True) builds has(array, x) and no IN-set.
+    select_expr = trace_item_filters_to_expression(
+        proto_filter, attribute_key_to_expression, membership_as_has=True
+    )
+    assert not _in_calls_over_arrays(select_expr), (
+        "membership_as_has must build has() instead of in() over a constant array "
+        "to avoid the unstable __set_* identifier"
+    )
     has_over_project_ids = [
         exp
-        for exp in rewritten
+        for exp in select_expr
         if isinstance(exp, FunctionCall)
         and exp.function_name == "has"
         and isinstance(exp.parameters[0], FunctionCall)
         and exp.parameters[0].function_name == "array"
         and [p.value for p in exp.parameters[0].parameters if isinstance(p, Literal)] == project_ids
     ]
-    assert has_over_project_ids, "expected has(array(<project_ids>), x) after inlining"
+    assert has_over_project_ids, "expected has(array(<project_ids>), x) with membership_as_has"
 
 
 @pytest.mark.clickhouse_db

From b1119f53dfaf317b07b8cf10d8ed8cbec4c9c98f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 03:11:15 +0000
Subject: [PATCH 5/6] Narrow _in_or_has return type to FunctionCall for mypy

Both branches (f.has(...) and in_cond(...)) return FunctionCall, and the OP_IN
handler assigns the result to `expr`, which mypy infers as FunctionCall from the
earlier f.equals(...) assignments. Annotate the helper as -> FunctionCall so the
assignment type-checks under strict mypy.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01U8g3uHFHpntdvcwLkg8fym
---
 snuba/web/rpc/common/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py
index 0f42e712196..70000198b22 100644
--- a/snuba/web/rpc/common/common.py
+++ b/snuba/web/rpc/common/common.py
@@ -50,7 +50,7 @@
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
 
 
-def _in_or_has(value: Expression, array: Expression, *, as_has: bool) -> Expression:
+def _in_or_has(value: Expression, array: Expression, *, as_has: bool) -> FunctionCall:
     """Build the membership test ``value IN array``.
 
     Returns ``in(value, array)`` by default, so ClickHouse keeps a prepared set for

From 5fad650ddc473ed97a3d4a75e89cfbc3cc465c66 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 22 Jun 2026 21:58:40 +0000
Subject: [PATCH 6/6] Cover any_attribute_filter IN/NOT_IN under
 membership_as_has
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bugbot caught a gap: trace_item_filters_to_expression routed any_attribute_filter
through _any_attribute_filter_to_expression, which always built in(x, array(...))
inside the arrayExists lambda — even with membership_as_has=True. In a SELECT-clause
aggregate that still emits an unstable __set_* prepared-set identifier and fails
mixed-version distributed reads (same class as SNUBA-9W6/A1W; same shape as the
arrayExists IN already fixed in #8073).

Thread membership_as_has into _any_attribute_filter_to_expression and build the
IN/NOT_IN comparison via the shared _in_or_has() helper (has(array, x) for SELECT,
in(x, array) for WHERE). Add a parametrized regression test for both ops.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01U8g3uHFHpntdvcwLkg8fym
---
 snuba/web/rpc/common/common.py | 16 +++++++---
 tests/web/rpc/test_common.py   | 54 ++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py
index 70000198b22..d196a66e802 100644
--- a/snuba/web/rpc/common/common.py
+++ b/snuba/web/rpc/common/common.py
@@ -616,6 +616,8 @@ def _type_array_includes_scalar_expression(
 
 def _any_attribute_filter_to_expression(
     filt: AnyAttributeFilter,
+    *,
+    membership_as_has: bool = False,
 ) -> Expression:
     """Build an expression that searches across attribute values.
 
@@ -626,7 +628,10 @@ def _any_attribute_filter_to_expression(
 
         arrayExists(x -> <comparison>(x, value), mapValues(column))
 
-    wrapped with NOT(...) for negative ops.
+    wrapped with NOT(...) for negative ops. ``membership_as_has`` builds the IN/NOT_IN
+    comparison as ``has(array, x)`` rather than ``in(x, array)`` so the (arrayExists'd)
+    expression carries no ``__set_*`` prepared-set identifier — required when it lands
+    in a SELECT-clause aggregate on a mixed-version distributed read (see ``_in_or_has``).
     """
     # 1. Extract and validate the comparison value
     v = filt.value
@@ -704,12 +709,13 @@ def _any_attribute_filter_to_expression(
                 lowered = [literal(s.lower()) for s in v.val_str_array.values]
             else:
                 lowered = [literal(elem.val_str.lower()) for elem in v.val_array.values]
-            comparison = in_cond(
+            comparison = _in_or_has(
                 f.lower(x),
                 literals_array(None, lowered),
+                as_has=membership_as_has,
             )
         else:
-            comparison = in_cond(x, v_expression)
+            comparison = _in_or_has(x, v_expression, as_has=membership_as_has)
     else:
         raise BadSnubaRPCRequestException(
             f"Unsupported any_attribute_filter op: {AnyAttributeFilter.Op.Name(filt.op)}"
@@ -1074,7 +1080,9 @@ def trace_item_filters_to_expression(
     if item_filter.HasField("any_attribute_filter"):
         if not state.get_int_config("enable_any_attribute_filter", 1):
             return literal(True)
-        return _any_attribute_filter_to_expression(item_filter.any_attribute_filter)
+        return _any_attribute_filter_to_expression(
+            item_filter.any_attribute_filter, membership_as_has=membership_as_has
+        )
 
     return literal(True)
 
diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py
index a2f8a599457..d6ce06c540b 100644
--- a/tests/web/rpc/test_common.py
+++ b/tests/web/rpc/test_common.py
@@ -733,6 +733,60 @@ def test_unsupported_value_type_raises(self) -> None:
         with pytest.raises(BadSnubaRPCRequestException, match="does not have a value"):
             _any_attribute_filter_to_expression(filt)
 
+    @staticmethod
+    def _in_over_arrays(expr: Expression) -> list[FunctionCall]:
+        return [
+            e
+            for e in expr
+            if isinstance(e, FunctionCall)
+            and e.function_name == "in"
+            and len(e.parameters) == 2
+            and isinstance(e.parameters[1], FunctionCall)
+            and e.parameters[1].function_name == "array"
+        ]
+
+    @staticmethod
+    def _has_over(expr: Expression, values: list[str]) -> list[FunctionCall]:
+        return [
+            e
+            for e in expr
+            if isinstance(e, FunctionCall)
+            and e.function_name == "has"
+            and isinstance(e.parameters[0], FunctionCall)
+            and e.parameters[0].function_name == "array"
+            and [p.value for p in e.parameters[0].parameters if isinstance(p, Literal)] == values
+        ]
+
+    @pytest.mark.parametrize("op", [AnyAttributeFilter.OP_IN, AnyAttributeFilter.OP_NOT_IN])
+    def test_membership_as_has_for_select_clause(self, op: AnyAttributeFilter.Op.ValueType) -> None:
+        """Regression guard for SNUBA-9W6 / SNUBA-A1W (mixed-version distributed reads).
+
+        An any-attribute ``IN``/``NOT_IN`` builds ``in(x, array(...))`` inside an
+        ``arrayExists`` lambda. When that filter lands in a SELECT-clause aggregate, the
+        constant ``IN`` set's ``__set_<hash>`` identifier leaks into the result-block
+        column name and breaks reads across mixed-version ``Remote`` nodes. With
+        ``membership_as_has=True`` the comparison must be ``has(array(...), x)`` instead;
+        the default (WHERE) keeps ``in()`` so the prepared set still drives pruning.
+        """
+        values = ["error", "internal_error"]
+        filt = AnyAttributeFilter(
+            op=op,
+            value=AttributeValue(
+                val_array=Array(values=[AttributeValue(val_str=v) for v in values])
+            ),
+        )
+
+        # Default (WHERE) keeps the in() set inside arrayExists.
+        where_expr = _any_attribute_filter_to_expression(filt)
+        assert self._in_over_arrays(where_expr), "WHERE form must keep in() over the constant array"
+
+        # SELECT-clause form uses has(array, x) and builds no in() set.
+        select_expr = _any_attribute_filter_to_expression(filt, membership_as_has=True)
+        assert not self._in_over_arrays(select_expr), (
+            "membership_as_has must replace in() over a constant array with has()"
+        )
+        assert self._has_over(select_expr, values), "expected has(array(values), x) in the lambda"
+
 
 @pytest.mark.eap
 @pytest.mark.redis_db