From b12bce33ff207384b2967e7b85164c3778764132 Mon Sep 17 00:00:00 2001
From: Jubin Soni <jubins@nyu.edu>
Date: Sun, 28 Jun 2026 08:28:29 -0700
Subject: [PATCH 1/2] [SPARK-57738][CONNECT] Restore fast-fail guard for
 nanosecond timestamp types in ArrowVectorReader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What is the purpose of the change

Fixes SPARK-57738 — restores the fast-fail guard for nanosecond-precision timestamp
types in `ArrowVectorReader`, which was silently broken by SPARK-57303.

SPARK-57303 updated `UpCastRule.canUpCast` to return `true` for lossless widening
within the timestamp family (e.g. `TimestampType -> TimestampLTZNanosType(p)`).
As a side effect, the existing unsupported-type guard in `ArrowVectorReader.applyDefault`
no longer rejects nanosecond timestamp targets — the SPARK-57303 commit message
explicitly flagged this as a known follow-up item.

Without this fix, a request to read a `TIMESTAMP_LTZ(p)` or `TIMESTAMP_NTZ(p)`
(`p` in `[7, 9]`) column over Spark Connect silently passes the guard and then
crashes with a confusing `"Unsupported Vector Type"` error from the catch-all
branch of the `vector match`. With this fix it fails fast with a clear
`"not yet supported"` message.

### Brief change log

- `sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala`:
  added `AnyTimestampNanoType` to the import and inserted an explicit rejection
  guard between the `canUpCast` check and the `vector match` block

### Verifying this change

No existing unit tests cover `ArrowVectorReader` directly. The fix is a
defensive guard on an unsupported code path (nanosecond-precision timestamps
are not yet reachable over Connect in any supported workflow), so the primary
verification is:

- Manual inspection: the guard fires before the `vector match`, so no
  nanosecond type can reach the `"Unsupported Vector Type"` catch-all
- The fix will be superseded and removed when Connect nanos support is
  implemented (the comment in the code points to this)

### Does this pull request potentially affect one of the following parts

- Dependencies (does it add or upgrade a dependency): no
- The public API, i.e., is any changed class annotated with `@Public`/`@Evolving`: no — `ArrowVectorReader` is `private[connect]`
- The serializers: no
- The runtime per-record code paths (performance sensitive): no — the guard only fires for an unsupported type that cannot currently be produced
- Anything that affects deployment or recovery: no
- The S3 file system connector: no

### Documentation

Does this pull request introduce a new feature? No — this is a bug fix restoring
a guard that was inadvertently disabled by SPARK-57303.

### Was generative AI tooling used to co-author this PR?

Yes — Claude Code was used as a pair-programming assistant. All code was written,
understood, and verified by the author.
Generated-by: Claude Sonnet 4.6
---
 .../sql/connect/client/arrow/ArrowVectorReader.scala  | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
index 54311cecc1627..7a622d3010e9f 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_SECOND
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
 import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils._
 import org.apache.spark.sql.connect.common.types.ops.ConnectTypeOps
-import org.apache.spark.sql.types.{DataType, DayTimeIntervalType, Decimal, UpCastRule, YearMonthIntervalType}
+import org.apache.spark.sql.types.{AnyTimestampNanoType, DataType, DayTimeIntervalType, Decimal, UpCastRule, YearMonthIntervalType}
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.util.SparkStringUtils
 
@@ -84,6 +84,15 @@ object ArrowVectorReader {
       throw new RuntimeException(
         s"Reading '$targetDataType' values from a ${vector.getClass} instance is not supported.")
     }
+    // Nanosecond-precision timestamp types (TIMESTAMP_LTZ(p) / TIMESTAMP_NTZ(p), p in [7,9]) are
+    // not yet supported over Spark Connect: there is no Arrow vector type for sub-microsecond
+    // timestamps and no reader implementation here. UpCastRule.canUpCast now returns true for the
+    // micro -> nanos widening direction (SPARK-57303), so the generic guard above no longer
+    // catches this case. Fail fast with a clear message until Connect nanos support is added.
+    if (targetDataType.isInstanceOf[AnyTimestampNanoType]) {
+      throw new RuntimeException(
+        s"Reading '$targetDataType' values over Spark Connect is not yet supported.")
+    }
     vector match {
       case v: BitVector => new BitVectorReader(v)
       case v: TinyIntVector => new TinyIntVectorReader(v)

From e17ac754e40655862ee4a941df4b88e03c5f1a5d Mon Sep 17 00:00:00 2001
From: Jubin Soni <jubins@nyu.edu>
Date: Sun, 28 Jun 2026 08:35:24 -0700
Subject: [PATCH 2/2] Added tests

Run with: build/sbt 'connect-client-jvm/testOnly *ArrowVectorReaderSuite'
---
 .../client/arrow/ArrowVectorReaderSuite.scala | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala

diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala
new file mode 100644
index 0000000000000..9e3c06de9a3d0
--- /dev/null
+++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReaderSuite.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client.arrow
+
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.TimeStampMicroTZVector
+
+import org.apache.spark.sql.connect.test.ConnectFunSuite
+import org.apache.spark.sql.types.{TimestampLTZNanosType, TimestampNTZNanosType, TimestampType}
+import org.apache.spark.sql.util.ArrowUtils
+
+class ArrowVectorReaderSuite extends ConnectFunSuite {
+
+  private val allocator = new RootAllocator()
+
+  override def afterAll(): Unit = {
+    allocator.close()
+    super.afterAll()
+  }
+
+  // Build a TimeStampMicroTZVector (the Arrow encoding for TimestampType) backed by a live
+  // allocator. This is the vector a Connect server would send for any LTZ timestamp column.
+  private def microTZVector(): TimeStampMicroTZVector = {
+    val field = ArrowUtils.toArrowField("ts", TimestampType, nullable = true, "UTC")
+    field.createVector(allocator).asInstanceOf[TimeStampMicroTZVector]
+  }
+
+  test("SPARK-XXXXX: ArrowVectorReader rejects TimestampLTZNanosType with a clear error") {
+    val vector = microTZVector()
+    try {
+      val ex = intercept[RuntimeException] {
+        ArrowVectorReader(TimestampLTZNanosType(9), vector, "UTC")
+      }
+      assert(ex.getMessage.contains("not yet supported"),
+        s"Expected 'not yet supported' in error message, got: ${ex.getMessage}")
+    } finally {
+      vector.close()
+    }
+  }
+
+  test("SPARK-XXXXX: ArrowVectorReader rejects TimestampNTZNanosType with a clear error") {
+    val vector = microTZVector()
+    try {
+      val ex = intercept[RuntimeException] {
+        ArrowVectorReader(TimestampNTZNanosType(7), vector, "UTC")
+      }
+      assert(ex.getMessage.contains("not yet supported"),
+        s"Expected 'not yet supported' in error message, got: ${ex.getMessage}")
+    } finally {
+      vector.close()
+    }
+  }
+
+  test("SPARK-XXXXX: ArrowVectorReader still succeeds for plain TimestampType") {
+    val vector = microTZVector()
+    try {
+      val reader = ArrowVectorReader(TimestampType, vector, "UTC")
+      assert(reader != null)
+    } finally {
+      vector.close()
+    }
+  }
+}