From 258b950540e6c53f0e7acb8d13636ea82d9280df Mon Sep 17 00:00:00 2001 From: Milan Dankovic Date: Fri, 26 Jun 2026 11:53:24 +0000 Subject: [PATCH 1/3] Init commit --- .../catalyst/analysis/PivotTransformer.scala | 58 +++++++++- .../apache/spark/sql/internal/SQLConf.scala | 13 +++ .../test/resources/sql-tests/inputs/pivot.sql | 103 ++++++++++++++++++ .../spark/sql/DataFramePivotSuite.scala | 45 +++++++- 4 files changed, 213 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PivotTransformer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PivotTransformer.scala index 7f22dab71e3bb..342c0509579dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PivotTransformer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PivotTransformer.scala @@ -22,8 +22,10 @@ import org.apache.spark.sql.catalyst.expressions.{ Alias, AliasHelper, Attribute, + AttributeReference, AttributeSet, Cast, + Coalesce, EmptyRow, EqualNullSafe, Expression, @@ -43,6 +45,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project} import org.apache.spark.sql.catalyst.util.toPrettySQL import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StringType /** @@ -138,12 +141,25 @@ object PivotTransformer extends AliasHelper with SQLConfHelper { firstAgg ) val pivotAggregatesAttributes = pivotAggregates.map(_.toAttribute) + // When the flag is disabled, supply no empty-input defaults: empty buckets stay NULL and no + // Coalesce is added, so the pivoted columns stay nullable. + val aggregateEmptyInputDefaults: Seq[Option[Expression]] = + if (conf.getConf(SQLConf.PIVOT_EMPTY_BUCKET_RETURNS_AGGREGATE_DEFAULT)) { + aggregates.map(aggregateEmptyInputDefault) + } else { + Seq.fill(aggregates.size)(None) + } val pivotOutputs = pivotValues.zipWithIndex.flatMap { case (value, i) => - aggregates.zip(pivotAggregatesAttributes).map { - case (aggregate, pivotAtt) => + aggregates.zip(pivotAggregatesAttributes).zip(aggregateEmptyInputDefaults).map { + case ((aggregate, pivotAtt), emptyInputDefault) => + val extractedValue = ExtractValue(pivotAtt, Literal(i), conf.resolver) + val withEmptyInputDefault = emptyInputDefault match { + case Some(default) => Coalesce(Seq(extractedValue, default)) + case None => extractedValue + } newAlias( - ExtractValue(pivotAtt, Literal(i), conf.resolver), + withEmptyInputDefault, Some(outputName(value, aggregate, isSingleAggregate = aggregates.size == 1)) ) } @@ -185,6 +201,42 @@ object PivotTransformer extends AliasHelper with SQLConfHelper { } } + /** + * Empty-input default for a pivot aggregate to coalesce into its extracted value, or `None` to + * leave the value unchanged. The fast path's [[PivotFirst]] leaves an unmatched pivot category's + * slot unset, so the caller wraps the result in a [[Coalesce]] to recover the value the slow path + * produces on an empty bucket (`count` -> 0; `sum`/`avg`/`min`/`max` -> NULL). + * + * Returned unevaluated so later constant folding defers a default that throws under ANSI (e.g. + * `count(v1) / count(v2)` -> `0 / 0`) to runtime, where [[Coalesce]] only evaluates it for + * actually-empty buckets -- matching the slow path. Mirrors + * `RewriteCorrelatedScalarSubquery.evalAggExprOnZeroTups`. + */ + private def aggregateEmptyInputDefault(aggregate: Expression): Option[Expression] = { + trimAliases(aggregate) match { + // Bare aggregate: use its published default result (count -> 0, sum/avg/min/max -> None). + case AggregateExpression(aggregateFunction, _, _, _, _) => + aggregateFunction.defaultResult + // Composite over aggregate(s): substitute each aggregate/attribute with its empty-input + // value. Return None for a non-foldable default or a literal NULL (nothing to coalesce in). A + // default that only folds to NULL (e.g. sum(x) + 1) is still returned; its Coalesce evaluates + // to NULL on an empty bucket, which is the correct result. + case other => + val default = other.transform { + case AggregateExpression(aggregateFunction, _, _, _, _) => + aggregateFunction.defaultResult.getOrElse( + Literal.create(null, aggregateFunction.dataType)) + case attribute: AttributeReference => + Literal.create(null, attribute.dataType) + } + default match { + case _ if !trimAliases(default).foldable => None + case Literal(null, _) => None + case _ => Some(default) + } + } + } + private def outputName( value: Expression, aggregate: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ff2dd2dbd4833..0037929854777 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2551,6 +2551,19 @@ object SQLConf { .intConf .createWithDefault(10000) + val PIVOT_EMPTY_BUCKET_RETURNS_AGGREGATE_DEFAULT = + buildConf("spark.sql.pivot.emptyBucketReturnsAggregateDefault") + .internal() + .doc("When true, a pivot value with no matching rows returns the value the aggregate " + + "produces on empty input, e.g. 0 for count, as required by the SQL standard. The same " + + "applies to other aggregates with a non-null result on empty input (e.g. " + + "approx_count_distinct) and to expressions over them (e.g. count(x) + 1). When false, " + + "such cells return NULL.") + .version("4.3.0") + .withBindingPolicy(ConfigBindingPolicy.SESSION) + .booleanConf + .createWithDefault(true) + val DATAFRAME_TRANSPOSE_MAX_VALUES = buildConf("spark.sql.transposeMaxValues") .doc("When doing a transpose without specifying values for the index column this is" + " the maximum number of values that will be transposed without error.") diff --git a/sql/core/src/test/resources/sql-tests/inputs/pivot.sql b/sql/core/src/test/resources/sql-tests/inputs/pivot.sql index fcc145959d877..ac8ddbd5855ca 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/pivot.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/pivot.sql @@ -317,3 +317,106 @@ PIVOT ( FOR course IN ('dotNET', 'Java') ) pv JOIN years y ON pv.year = y.y; + +-- count: empty pivot bucket returns 0 (not NULL) +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- count(*): empty bucket returns 0 +SELECT * FROM ( + SELECT year, course FROM courseSales +) +PIVOT ( + count(*) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- count and sum: empty bucket returns 0 and NULL +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings), sum(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- sum/avg/min/max: empty bucket returns NULL +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + sum(earnings), avg(earnings), min(earnings), max(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- approx_count_distinct: empty bucket returns 0 +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + approx_count_distinct(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- count(DISTINCT): empty bucket returns 0 +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(DISTINCT earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- cast(count, double) and count + 1: empty bucket returns 0.0 and 1 +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + CAST(count(earnings) AS DOUBLE), count(earnings) + 1 + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- sum + 1: empty bucket returns NULL (folds to NULL) +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + sum(earnings) + 1 + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- count + count: empty bucket returns 0 +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + count(year) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- conditional count: empty bucket returns 0 +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(CASE WHEN earnings > 15000 THEN earnings END) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +-- disable the empty-bucket default: count returns NULL again for empty buckets +SET spark.sql.pivot.emptyBucketReturnsAggregateDefault=false; + +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +); + +SET spark.sql.pivot.emptyBucketReturnsAggregateDefault=true; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala index 270d74cf5ef54..60a9a7a3e5e4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import java.time.LocalDateTime import java.util.Locale +import org.apache.spark.SparkArithmeticException import org.apache.spark.sql.catalyst.expressions.aggregate.PivotFirst import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf @@ -241,10 +242,48 @@ class DataFramePivotSuite extends SharedSparkSession { ) } - test("pivot with null should not throw NPE") { + // SPARK-19882: null pivot keys must not NPE; also checks empty count buckets return 0. + test("pivot count with null pivot value returns 0 for empty buckets") { checkAnswer( Seq(Tuple1(None), Tuple1(Some(1))).toDF("a").groupBy($"a").pivot("a").count(), - Row(null, 1, null) :: Row(1, null, 1) :: Nil) + Row(null, 1, 0) :: Row(1, 0, 1) :: Nil) + } + + // Empty-bucket result coverage is in the pivot.sql golden file. The tests below cover what it + // cannot: output-schema nullability, the ANSI runtime error path, and the empty-bucket flag. + + test("pivot count produces non-nullable column schema") { + val df = Seq((1, "a")).toDF("id", "cat") + .groupBy("id").pivot("cat", Seq("a", "b")).count() + assert(!df.schema("a").nullable, s"expected 'a' column to be non-nullable: ${df.schema}") + assert(!df.schema("b").nullable, s"expected 'b' column to be non-nullable: ${df.schema}") + } + + test("pivot integral division of counts throws on an empty bucket under ANSI") { + // 0 div 0 on an empty bucket throws DIVIDE_BY_ZERO at runtime, mirroring the slow path. + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + val df = Seq((1, "a", 10, 100), (2, "a", 40, 400)) + .toDF("id", "cat", "v1", "v2") + .groupBy("id") + .pivot("cat", Seq("a", "b")) + .agg(expr("count(v1) div count(v2)")) + checkError( + exception = intercept[SparkArithmeticException] { + df.collect() + }, + condition = "DIVIDE_BY_ZERO", + parameters = Map("config" -> "\"spark.sql.ansi.enabled\""), + context = ExpectedContext(fragment = "count(v1) div count(v2)", start = 0, stop = 22)) + } + } + + test("disabling the empty-bucket default returns NULL for empty count buckets") { + withSQLConf(SQLConf.PIVOT_EMPTY_BUCKET_RETURNS_AGGREGATE_DEFAULT.key -> "false") { + val df = Seq(Tuple1(None), Tuple1(Some(1))).toDF("a").groupBy($"a").pivot("a").count() + checkAnswer(df, Row(null, 1, null) :: Row(1, null, 1) :: Nil) + // No Coalesce is added, so the pivoted columns stay nullable. + assert(df.schema.fields.drop(1).forall(_.nullable)) + } } test("pivot with null and aggregate type not supported by PivotFirst returns correct result") { @@ -263,7 +302,7 @@ class DataFramePivotSuite extends SharedSparkSession { val df = Seq(java.sql.Timestamp.valueOf(ts)).toDF("a").groupBy("a").pivot("a").count() val expected = StructType( StructField("a", TimestampType) :: - StructField(tsWithZone, LongType) :: Nil) + StructField(tsWithZone, LongType, nullable = false) :: Nil) assert(df.schema == expected) // String representation of timestamp with timezone should take the time difference // into account. From cdb3e1b1ba5bb11d7a0edc52d8c79a78eaf67395 Mon Sep 17 00:00:00 2001 From: Milan Dankovic Date: Fri, 26 Jun 2026 12:33:46 +0000 Subject: [PATCH 2/3] Regenerate golden files --- .../sql-tests/analyzer-results/pivot.sql.out | 267 +++++++++++++++++- .../analyzer-results/udf/udf-pivot.sql.out | 2 +- .../resources/sql-tests/results/pivot.sql.out | 180 ++++++++++++ 3 files changed, 447 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out index f5a5c1ff640ea..7db91cb29ac5c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out @@ -374,7 +374,7 @@ PIVOT ( ) -- !query analysis Project [year#x, dotNET_CEIL(sum(earnings))#xL, dotNET_a1#x, Java_CEIL(sum(earnings))#xL, Java_a1#x] -+- Project [year#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0] AS dotNET_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1] AS Java_CEIL(sum(earnings))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Project [year#x, coalesce(__pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[0], CEIL(null)) AS dotNET_CEIL(sum(earnings))#xL, coalesce(__pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0], (null + cast(1 as double))) AS dotNET_a1#x, coalesce(__pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x[1], CEIL(null)) AS Java_CEIL(sum(earnings))#xL, coalesce(__pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1], (null + cast(1 as double))) AS Java_a1#x] +- Aggregate [year#x], [year#x, pivotfirst(course#x, CEIL(sum(__auto_generated_subquery_name.earnings))#xL, dotNET, Java, 0, 0) AS __pivot_CEIL(sum(__auto_generated_subquery_name.earnings)) AS `CEIL(sum(__auto_generated_subquery_name.earnings))`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] +- Aggregate [year#x, course#x], [year#x, course#x, CEIL(sum(earnings#x)) AS CEIL(sum(__auto_generated_subquery_name.earnings))#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] +- SubqueryAlias __auto_generated_subquery_name @@ -848,3 +848,268 @@ Project [year#x, dotNET#xL, s#x] +- Project [y#x, s#x] +- SubqueryAlias years +- LocalRelation [y#x, s#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, coalesce(__pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[0], 0) AS dotNET#xL, coalesce(__pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[1], 0) AS Java#xL, coalesce(__pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[2], 0) AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, count(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, count(earnings#x) AS count(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course FROM courseSales +) +PIVOT ( + count(*) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, coalesce(__pivot_count(1) AS `count(1)`#x[0], 0) AS dotNET#xL, coalesce(__pivot_count(1) AS `count(1)`#x[1], 0) AS Java#xL, coalesce(__pivot_count(1) AS `count(1)`#x[2], 0) AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, count(1)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_count(1) AS `count(1)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, count(1) AS count(1)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings), sum(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET_count(earnings)#xL, dotNET_sum(earnings)#xL, Java_count(earnings)#xL, Java_sum(earnings)#xL, Scala_count(earnings)#xL, Scala_sum(earnings)#xL] ++- Project [year#x, coalesce(__pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[0], 0) AS dotNET_count(earnings)#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, coalesce(__pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[1], 0) AS Java_count(earnings)#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, coalesce(__pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[2], 0) AS Scala_count(earnings)#xL, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[2] AS Scala_sum(earnings)#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, count(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, count(earnings#x) AS count(__auto_generated_subquery_name.earnings)#xL, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + sum(earnings), avg(earnings), min(earnings), max(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET_sum(earnings)#xL, dotNET_avg(earnings)#x, dotNET_min(earnings)#x, dotNET_max(earnings)#x, Java_sum(earnings)#xL, Java_avg(earnings)#x, Java_min(earnings)#x, Java_max(earnings)#x, Scala_sum(earnings)#xL, Scala_avg(earnings)#x, Scala_min(earnings)#x, Scala_max(earnings)#x] ++- Project [year#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_avg(earnings)#x, __pivot_min(__auto_generated_subquery_name.earnings) AS `min(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_min(earnings)#x, __pivot_max(__auto_generated_subquery_name.earnings) AS `max(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET_max(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[1] AS Java_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[1] AS Java_avg(earnings)#x, __pivot_min(__auto_generated_subquery_name.earnings) AS `min(__auto_generated_subquery_name.earnings)`#x[1] AS Java_min(earnings)#x, __pivot_max(__auto_generated_subquery_name.earnings) AS `max(__auto_generated_subquery_name.earnings)`#x[1] AS Java_max(earnings)#x, __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x[2] AS Scala_sum(earnings)#xL, __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x[2] AS Scala_avg(earnings)#x, __pivot_min(__auto_generated_subquery_name.earnings) AS `min(__auto_generated_subquery_name.earnings)`#x[2] AS Scala_min(earnings)#x, __pivot_max(__auto_generated_subquery_name.earnings) AS `max(__auto_generated_subquery_name.earnings)`#x[2] AS Scala_max(earnings)#x] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_sum(__auto_generated_subquery_name.earnings) AS `sum(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, avg(__auto_generated_subquery_name.earnings)#x, dotNET, Java, Scala, 0, 0) AS __pivot_avg(__auto_generated_subquery_name.earnings) AS `avg(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, min(__auto_generated_subquery_name.earnings)#x, dotNET, Java, Scala, 0, 0) AS __pivot_min(__auto_generated_subquery_name.earnings) AS `min(__auto_generated_subquery_name.earnings)`#x, pivotfirst(course#x, max(__auto_generated_subquery_name.earnings)#x, dotNET, Java, Scala, 0, 0) AS __pivot_max(__auto_generated_subquery_name.earnings) AS `max(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(__auto_generated_subquery_name.earnings)#xL, avg(earnings#x) AS avg(__auto_generated_subquery_name.earnings)#x, min(earnings#x) AS min(__auto_generated_subquery_name.earnings)#x, max(earnings#x) AS max(__auto_generated_subquery_name.earnings)#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + approx_count_distinct(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, coalesce(__pivot_approx_count_distinct(__auto_generated_subquery_name.earnings) AS `approx_count_distinct(__auto_generated_subquery_name.earnings)`#x[0], 0) AS dotNET#xL, coalesce(__pivot_approx_count_distinct(__auto_generated_subquery_name.earnings) AS `approx_count_distinct(__auto_generated_subquery_name.earnings)`#x[1], 0) AS Java#xL, coalesce(__pivot_approx_count_distinct(__auto_generated_subquery_name.earnings) AS `approx_count_distinct(__auto_generated_subquery_name.earnings)`#x[2], 0) AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, approx_count_distinct(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_approx_count_distinct(__auto_generated_subquery_name.earnings) AS `approx_count_distinct(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, approx_count_distinct(earnings#x, 0.05, 0, 0) AS approx_count_distinct(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(DISTINCT earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, coalesce(__pivot_count(DISTINCT __auto_generated_subquery_name.earnings) AS `count(DISTINCT __auto_generated_subquery_name.earnings)`#x[0], 0) AS dotNET#xL, coalesce(__pivot_count(DISTINCT __auto_generated_subquery_name.earnings) AS `count(DISTINCT __auto_generated_subquery_name.earnings)`#x[1], 0) AS Java#xL, coalesce(__pivot_count(DISTINCT __auto_generated_subquery_name.earnings) AS `count(DISTINCT __auto_generated_subquery_name.earnings)`#x[2], 0) AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, count(DISTINCT __auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_count(DISTINCT __auto_generated_subquery_name.earnings) AS `count(DISTINCT __auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, count(distinct earnings#x) AS count(DISTINCT __auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + CAST(count(earnings) AS DOUBLE), count(earnings) + 1 + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET_CAST(count(earnings) AS DOUBLE)#x, dotNET_(count(earnings) + 1)#xL, Java_CAST(count(earnings) AS DOUBLE)#x, Java_(count(earnings) + 1)#xL, Scala_CAST(count(earnings) AS DOUBLE)#x, Scala_(count(earnings) + 1)#xL] ++- Project [year#x, coalesce(__pivot_CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE) AS `CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE)`#x[0], cast(0 as double)) AS dotNET_CAST(count(earnings) AS DOUBLE)#x, coalesce(__pivot_(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x[0], (0 + cast(1 as bigint))) AS dotNET_(count(earnings) + 1)#xL, coalesce(__pivot_CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE) AS `CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE)`#x[1], cast(0 as double)) AS Java_CAST(count(earnings) AS DOUBLE)#x, coalesce(__pivot_(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x[1], (0 + cast(1 as bigint))) AS Java_(count(earnings) + 1)#xL, coalesce(__pivot_CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE) AS `CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE)`#x[2], cast(0 as double)) AS Scala_CAST(count(earnings) AS DOUBLE)#x, coalesce(__pivot_(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x[2], (0 + cast(1 as bigint))) AS Scala_(count(earnings) + 1)#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE)#x, dotNET, Java, Scala, 0, 0) AS __pivot_CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE) AS `CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE)`#x, pivotfirst(course#x, (count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))#xL, dotNET, Java, Scala, 0, 0) AS __pivot_(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, cast(count(earnings#x) as double) AS CAST(count(__auto_generated_subquery_name.earnings) AS DOUBLE)#x, (count(earnings#x) + cast(1 as bigint)) AS (count(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + sum(earnings) + 1 + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, coalesce(__pivot_(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x[0], (null + cast(1 as bigint))) AS dotNET#xL, coalesce(__pivot_(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x[1], (null + cast(1 as bigint))) AS Java#xL, coalesce(__pivot_(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x[2], (null + cast(1 as bigint))) AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, (sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))#xL, dotNET, Java, Scala, 0, 0) AS __pivot_(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT)) AS `(sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, (sum(earnings#x) + cast(1 as bigint)) AS (sum(__auto_generated_subquery_name.earnings) + CAST(1 AS BIGINT))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + count(year) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [dotNET#xL, Java#xL, Scala#xL] ++- Project [coalesce(__pivot_(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year)) AS `(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year))`#x[0], (0 + 0)) AS dotNET#xL, coalesce(__pivot_(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year)) AS `(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year))`#x[1], (0 + 0)) AS Java#xL, coalesce(__pivot_(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year)) AS `(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year))`#x[2], (0 + 0)) AS Scala#xL] + +- Aggregate [pivotfirst(course#x, (count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year))#xL, dotNET, Java, Scala, 0, 0) AS __pivot_(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year)) AS `(count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year))`#x] + +- Aggregate [course#x], [course#x, (count(earnings#x) + count(year#x)) AS (count(__auto_generated_subquery_name.earnings) + count(__auto_generated_subquery_name.year))#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(CASE WHEN earnings > 15000 THEN earnings END) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, coalesce(__pivot_count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END) AS `count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END)`#x[0], 0) AS dotNET#xL, coalesce(__pivot_count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END) AS `count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END)`#x[1], 0) AS Java#xL, coalesce(__pivot_count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END) AS `count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END)`#x[2], 0) AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END) AS `count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, count(CASE WHEN (earnings#x > 15000) THEN earnings#x END) AS count(CASE WHEN (__auto_generated_subquery_name.earnings > 15000) THEN __auto_generated_subquery_name.earnings END)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SET spark.sql.pivot.emptyBucketReturnsAggregateDefault=false +-- !query analysis +SetCommand (spark.sql.pivot.emptyBucketReturnsAggregateDefault,Some(false)) + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query analysis +Project [year#x, dotNET#xL, Java#xL, Scala#xL] ++- Project [year#x, __pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[0] AS dotNET#xL, __pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[1] AS Java#xL, __pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x[2] AS Scala#xL] + +- Aggregate [year#x], [year#x, pivotfirst(course#x, count(__auto_generated_subquery_name.earnings)#xL, dotNET, Java, Scala, 0, 0) AS __pivot_count(__auto_generated_subquery_name.earnings) AS `count(__auto_generated_subquery_name.earnings)`#x] + +- Aggregate [year#x, course#x], [year#x, course#x, count(earnings#x) AS count(__auto_generated_subquery_name.earnings)#xL] + +- SubqueryAlias __auto_generated_subquery_name + +- Project [year#x, course#x, earnings#x] + +- SubqueryAlias coursesales + +- View (`courseSales`, [course#x, year#x, earnings#x]) + +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x] + +- Project [course#x, year#x, earnings#x] + +- SubqueryAlias courseSales + +- LocalRelation [course#x, year#x, earnings#x] + + +-- !query +SET spark.sql.pivot.emptyBucketReturnsAggregateDefault=true +-- !query analysis +SetCommand (spark.sql.pivot.emptyBucketReturnsAggregateDefault,Some(true)) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out index 5cfa86309f6d1..032073274086b 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out @@ -374,7 +374,7 @@ PIVOT ( ) -- !query analysis Project [year#x, dotNET_udf(CEIL(udf(sum(earnings))))#xL, dotNET_a1#x, Java_udf(CEIL(udf(sum(earnings))))#xL, Java_a1#x] -+- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0] AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1] AS Java_a1#x] ++- Project [year#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[0] AS dotNET_udf(CEIL(udf(sum(earnings))))#xL, coalesce(__pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[0], (null + cast(1 as double))) AS dotNET_a1#x, __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x[1] AS Java_udf(CEIL(udf(sum(earnings))))#xL, coalesce(__pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x[1], (null + cast(1 as double))) AS Java_a1#x] +- Aggregate [year#x], [year#x, pivotfirst(course#x, CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, dotNET, Java, 0, 0) AS __pivot_CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT) AS `CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)`#x, pivotfirst(course#x, (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x, dotNET, Java, 0, 0) AS __pivot_(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1 AS `(avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1`#x] +- Aggregate [year#x, course#x], [year#x, course#x, cast(udf(cast(CEIL(cast(udf(cast(sum(earnings#x) as string)) as bigint)) as string)) as bigint) AS CAST(udf(cast(CEIL(cast(udf(cast(sum(earnings) as string)) as bigint)) as string)) AS BIGINT)#xL, (avg(earnings#x) + cast(1 as double)) AS (avg(__auto_generated_subquery_name.earnings) + CAST(1 AS DOUBLE)) AS a1#x] +- SubqueryAlias __auto_generated_subquery_name diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out index 452901e266d31..2b2b266ef9991 100644 --- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out @@ -593,3 +593,183 @@ struct -- !query output 2012 15000 1 2013 48000 2 + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2 1 0 +2013 1 1 0 + + +-- !query +SELECT * FROM ( + SELECT year, course FROM courseSales +) +PIVOT ( + count(*) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2 1 0 +2013 1 1 0 + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings), sum(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2 15000 1 20000 0 NULL +2013 1 48000 1 30000 0 NULL + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + sum(earnings), avg(earnings), min(earnings), max(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 15000 7500.0 5000 10000 20000 20000.0 20000 20000 NULL NULL NULL NULL +2013 48000 48000.0 48000 48000 30000 30000.0 30000 30000 NULL NULL NULL NULL + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + approx_count_distinct(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2 1 0 +2013 1 1 0 + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(DISTINCT earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2 1 0 +2013 1 1 0 + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + CAST(count(earnings) AS DOUBLE), count(earnings) + 1 + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2.0 3 1.0 2 0.0 1 +2013 1.0 2 1.0 2 0.0 1 + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + sum(earnings) + 1 + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 15001 20001 NULL +2013 48001 30001 NULL + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + count(year) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +6 4 0 + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(CASE WHEN earnings > 15000 THEN earnings END) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 0 1 0 +2013 1 1 0 + + +-- !query +SET spark.sql.pivot.emptyBucketReturnsAggregateDefault=false +-- !query schema +struct +-- !query output +spark.sql.pivot.emptyBucketReturnsAggregateDefault false + + +-- !query +SELECT * FROM ( + SELECT year, course, earnings FROM courseSales +) +PIVOT ( + count(earnings) + FOR course IN ('dotNET', 'Java', 'Scala') +) +-- !query schema +struct +-- !query output +2012 2 1 NULL +2013 1 1 NULL + + +-- !query +SET spark.sql.pivot.emptyBucketReturnsAggregateDefault=true +-- !query schema +struct +-- !query output +spark.sql.pivot.emptyBucketReturnsAggregateDefault true From b6b6e1cec4968bc1e622e2f014af0dbe2d07e4bc Mon Sep 17 00:00:00 2001 From: Milan Dankovic Date: Fri, 26 Jun 2026 12:51:29 +0000 Subject: [PATCH 3/3] Add golden file --- .../test/resources/query-tests/explain-results/pivot.explain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/pivot.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/pivot.explain index b8cd844123773..c5f638ee8ab78 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/pivot.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/pivot.explain @@ -1,4 +1,4 @@ -Project [id#0L, __pivot_count(b) AS `count(b)`#0[0] AS 1#0L, __pivot_count(b) AS `count(b)`#0[1] AS 2#0L, __pivot_count(b) AS `count(b)`#0[2] AS 3#0L] +Project [id#0L, coalesce(__pivot_count(b) AS `count(b)`#0[0], 0) AS 1#0L, coalesce(__pivot_count(b) AS `count(b)`#0[1], 0) AS 2#0L, coalesce(__pivot_count(b) AS `count(b)`#0[2], 0) AS 3#0L] +- Aggregate [id#0L], [id#0L, pivotfirst(a#0, count(b)#0L, 1, 2, 3, 0, 0) AS __pivot_count(b) AS `count(b)`#0] +- Aggregate [id#0L, a#0], [id#0L, a#0, count(b#0) AS count(b)#0L] +- LocalRelation , [id#0L, a#0, b#0]