From a8a30a818779a994433461f44d91d0c14c9ced76 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Sat, 27 Jun 2026 10:53:50 -0700 Subject: [PATCH] [SPARK-57727][SQL] Fix inferAdditionalConstraints incorrectly substituting attributes with non-binary-stable collations --- .../plans/logical/QueryPlanConstraints.scala | 10 +++++++--- .../apache/spark/sql/collation/CollationSuite.scala | 12 ++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala index ef035eba5922c..af335450e0f51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical import scala.annotation.tailrec import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.util.UnsafeRowUtils.isBinaryStable trait QueryPlanConstraints extends ConstraintHelper { self: LogicalPlan => @@ -65,15 +66,18 @@ trait ConstraintHelper { // IsNotNull should be constructed by `constructIsNotNullConstraints`. val predicates = constraints.filterNot(_.isInstanceOf[IsNotNull]) predicates.foreach { - case eq @ EqualTo(l: Attribute, r: Attribute) => + case eq @ EqualTo(l: Attribute, r: Attribute) + if isBinaryStable(l.dataType) && isBinaryStable(r.dataType) => // Also remove EqualNullSafe with the same l and r to avoid Once strategy's idempotence // is broken. l = r and l <=> r can infer l <=> l and r <=> r which is useless. val candidateConstraints = predicates - eq - EqualNullSafe(l, r) inferredConstraints ++= replaceConstraints(candidateConstraints, l, r) inferredConstraints ++= replaceConstraints(candidateConstraints, r, l) - case eq @ EqualTo(l @ Cast(_: Attribute, _, _, _), r: Attribute) => + case eq @ EqualTo(l @ Cast(lc: Attribute, _, _, _), r: Attribute) + if isBinaryStable(lc.dataType) && isBinaryStable(r.dataType) => inferredConstraints ++= replaceConstraints(predicates - eq - EqualNullSafe(l, r), r, l) - case eq @ EqualTo(l: Attribute, r @ Cast(_: Attribute, _, _, _)) => + case eq @ EqualTo(l: Attribute, r @ Cast(rc: Attribute, _, _, _)) + if isBinaryStable(l.dataType) && isBinaryStable(rc.dataType) => inferredConstraints ++= replaceConstraints(predicates - eq - EqualNullSafe(l, r), l, r) case _ => // No inference } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala index 711e1e091a982..17f6e758f1a6d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala @@ -2440,6 +2440,18 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } } + test("SPARK-57727: constraint inference does not substitute non-binary-stable attributes") { + withTable("t1") { + sql("CREATE TABLE t1 (a STRING COLLATE UTF8_LCASE, b STRING COLLATE UTF8_LCASE)") + sql("INSERT INTO t1 VALUES ('hello', 'HELLO')") + + checkAnswer( + sql("SELECT a, b FROM t1 WHERE a = b AND a = 'hello' COLLATE UTF8_BINARY"), + Row("hello", "HELLO") + ) + } + } + test("ConstantPropagation: replaces binary-stable attributes with contradicting predicates") { withTable("t1") { sql("CREATE TABLE t1 (c STRING)")