From b12bce33ff207384b2967e7b85164c3778764132 Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Sun, 28 Jun 2026 08:28:29 -0700 Subject: [PATCH 1/2] [SPARK-57738][CONNECT] Restore fast-fail guard for nanosecond timestamp types in ArrowVectorReader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What is the purpose of the change Fixes SPARK-57738 — restores the fast-fail guard for nanosecond-precision timestamp types in `ArrowVectorReader`, which was silently broken by SPARK-57303. SPARK-57303 updated `UpCastRule.canUpCast` to return `true` for lossless widening within the timestamp family (e.g. `TimestampType -> TimestampLTZNanosType(p)`). As a side effect, the existing unsupported-type guard in `ArrowVectorReader.applyDefault` no longer rejects nanosecond timestamp targets — the SPARK-57303 commit message explicitly flagged this as a known follow-up item. Without this fix, a request to read a `TIMESTAMP_LTZ(p)` or `TIMESTAMP_NTZ(p)` (`p` in `[7, 9]`) column over Spark Connect silently passes the guard and then crashes with a confusing `"Unsupported Vector Type"` error from the catch-all branch of the `vector match`. With this fix it fails fast with a clear `"not yet supported"` message. ### Brief change log - `sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala`: added `AnyTimestampNanoType` to the import and inserted an explicit rejection guard between the `canUpCast` check and the `vector match` block ### Verifying this change No existing unit tests cover `ArrowVectorReader` directly. The fix is a defensive guard on an unsupported code path (nanosecond-precision timestamps are not yet reachable over Connect in any supported workflow), so the primary verification is: - Manual inspection: the guard fires before the `vector match`, so no nanosecond type can reach the `"Unsupported Vector Type"` catch-all - The fix will be superseded and removed when Connect nanos support is implemented (the comment in the code points to this) ### Does this pull request potentially affect one of the following parts - Dependencies (does it add or upgrade a dependency): no - The public API, i.e., is any changed class annotated with `@Public`/`@Evolving`: no — `ArrowVectorReader` is `private[connect]` - The serializers: no - The runtime per-record code paths (performance sensitive): no — the guard only fires for an unsupported type that cannot currently be produced - Anything that affects deployment or recovery: no - The S3 file system connector: no ### Documentation Does this pull request introduce a new feature? No — this is a bug fix restoring a guard that was inadvertently disabled by SPARK-57303. ### Was generative AI tooling used to co-author this PR? Yes — Claude Code was used as a pair-programming assistant. All code was written, understood, and verified by the author. Generated-by: Claude Sonnet 4.6 --- .../sql/connect/client/arrow/ArrowVectorReader.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala index 54311cecc1627..7a622d3010e9f 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_SECOND import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils._ import org.apache.spark.sql.connect.common.types.ops.ConnectTypeOps -import org.apache.spark.sql.types.{DataType, DayTimeIntervalType, Decimal, UpCastRule, YearMonthIntervalType} +import org.apache.spark.sql.types.{AnyTimestampNanoType, DataType, DayTimeIntervalType, Decimal, UpCastRule, YearMonthIntervalType} import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.util.SparkStringUtils @@ -84,6 +84,15 @@ object ArrowVectorReader { throw new RuntimeException( s"Reading '$targetDataType' values from a ${vector.getClass} instance is not supported.") } + // Nanosecond-precision timestamp types (TIMESTAMP_LTZ(p) / TIMESTAMP_NTZ(p), p in [7,9]) are + // not yet supported over Spark Connect: there is no Arrow vector type for sub-microsecond + // timestamps and no reader implementation here. UpCastRule.canUpCast now returns true for the + // micro -> nanos widening direction (SPARK-57303), so the generic guard above no longer + // catches this case. Fail fast with a clear message until Connect nanos support is added. + if (targetDataType.isInstanceOf[AnyTimestampNanoType]) { + throw new RuntimeException( + s"Reading '$targetDataType' values over Spark Connect is not yet supported.") + } vector match { case v: BitVector => new BitVectorReader(v) case v: TinyIntVector => new TinyIntVectorReader(v) From e17ac754e40655862ee4a941df4b88e03c5f1a5d Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Sun, 28 Jun 2026 08:35:24 -0700 Subject: [PATCH 2/2] Added tests Run with: build/sbt 'connect-client-jvm/testOnly *ArrowVectorReaderSuite' --- .../client/arrow/ArrowVectorReaderSuite.scala | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala new file mode 100644 index 0000000000000..9e3c06de9a3d0 --- /dev/null +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connect.client.arrow + +import org.apache.arrow.memory.RootAllocator +import org.apache.arrow.vector.TimeStampMicroTZVector + +import org.apache.spark.sql.connect.test.ConnectFunSuite +import org.apache.spark.sql.types.{TimestampLTZNanosType, TimestampNTZNanosType, TimestampType} +import org.apache.spark.sql.util.ArrowUtils + +class ArrowVectorReaderSuite extends ConnectFunSuite { + + private val allocator = new RootAllocator() + + override def afterAll(): Unit = { + allocator.close() + super.afterAll() + } + + // Build a TimeStampMicroTZVector (the Arrow encoding for TimestampType) backed by a live + // allocator. This is the vector a Connect server would send for any LTZ timestamp column. + private def microTZVector(): TimeStampMicroTZVector = { + val field = ArrowUtils.toArrowField("ts", TimestampType, nullable = true, "UTC") + field.createVector(allocator).asInstanceOf[TimeStampMicroTZVector] + } + + test("SPARK-XXXXX: ArrowVectorReader rejects TimestampLTZNanosType with a clear error") { + val vector = microTZVector() + try { + val ex = intercept[RuntimeException] { + ArrowVectorReader(TimestampLTZNanosType(9), vector, "UTC") + } + assert(ex.getMessage.contains("not yet supported"), + s"Expected 'not yet supported' in error message, got: ${ex.getMessage}") + } finally { + vector.close() + } + } + + test("SPARK-XXXXX: ArrowVectorReader rejects TimestampNTZNanosType with a clear error") { + val vector = microTZVector() + try { + val ex = intercept[RuntimeException] { + ArrowVectorReader(TimestampNTZNanosType(7), vector, "UTC") + } + assert(ex.getMessage.contains("not yet supported"), + s"Expected 'not yet supported' in error message, got: ${ex.getMessage}") + } finally { + vector.close() + } + } + + test("SPARK-XXXXX: ArrowVectorReader still succeeds for plain TimestampType") { + val vector = microTZVector() + try { + val reader = ArrowVectorReader(TimestampType, vector, "UTC") + assert(reader != null) + } finally { + vector.close() + } + } +}