diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala index ef035eba5922c..af335450e0f51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical import scala.annotation.tailrec import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.util.UnsafeRowUtils.isBinaryStable trait QueryPlanConstraints extends ConstraintHelper { self: LogicalPlan => @@ -65,15 +66,18 @@ trait ConstraintHelper { // IsNotNull should be constructed by `constructIsNotNullConstraints`. val predicates = constraints.filterNot(_.isInstanceOf[IsNotNull]) predicates.foreach { - case eq @ EqualTo(l: Attribute, r: Attribute) => + case eq @ EqualTo(l: Attribute, r: Attribute) + if isBinaryStable(l.dataType) && isBinaryStable(r.dataType) => // Also remove EqualNullSafe with the same l and r to avoid Once strategy's idempotence // is broken. l = r and l <=> r can infer l <=> l and r <=> r which is useless. val candidateConstraints = predicates - eq - EqualNullSafe(l, r) inferredConstraints ++= replaceConstraints(candidateConstraints, l, r) inferredConstraints ++= replaceConstraints(candidateConstraints, r, l) - case eq @ EqualTo(l @ Cast(_: Attribute, _, _, _), r: Attribute) => + case eq @ EqualTo(l @ Cast(lc: Attribute, _, _, _), r: Attribute) + if isBinaryStable(lc.dataType) && isBinaryStable(r.dataType) => inferredConstraints ++= replaceConstraints(predicates - eq - EqualNullSafe(l, r), r, l) - case eq @ EqualTo(l: Attribute, r @ Cast(_: Attribute, _, _, _)) => + case eq @ EqualTo(l: Attribute, r @ Cast(rc: Attribute, _, _, _)) + if isBinaryStable(l.dataType) && isBinaryStable(rc.dataType) => inferredConstraints ++= replaceConstraints(predicates - eq - EqualNullSafe(l, r), l, r) case _ => // No inference } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala index 711e1e091a982..17f6e758f1a6d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala @@ -2440,6 +2440,18 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } } + test("SPARK-57727: constraint inference does not substitute non-binary-stable attributes") { + withTable("t1") { + sql("CREATE TABLE t1 (a STRING COLLATE UTF8_LCASE, b STRING COLLATE UTF8_LCASE)") + sql("INSERT INTO t1 VALUES ('hello', 'HELLO')") + + checkAnswer( + sql("SELECT a, b FROM t1 WHERE a = b AND a = 'hello' COLLATE UTF8_BINARY"), + Row("hello", "HELLO") + ) + } + } + test("ConstantPropagation: replaces binary-stable attributes with contradicting predicates") { withTable("t1") { sql("CREATE TABLE t1 (c STRING)")