diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md index 27663763d6bd6..a5da7949d0599 100644 --- a/docs/sql-ref-datatypes.md +++ b/docs/sql-ref-datatypes.md @@ -48,6 +48,7 @@ Spark SQL and DataFrames support the following data types: time-zone. - `TimeType(precision)`: Represents values comprising values of fields hour, minute and second with the number of decimal digits `precision` following the decimal point in the seconds field, without a time-zone. The range of values is from `00:00:00` to `23:59:59` for min precision `0`, and to `23:59:59.999999999` for max precision `9`. The default precision is `6`. + - Note: Apache Hive has no TIME type, so `TimeType` is not supported in Hive SerDe interop. Storing it in a Hive SerDe table (including `INSERT OVERWRITE DIRECTORY ... STORED AS`) or passing it to a Hive UDF/UDAF/UDTF raises an error rather than silently converting the value. - `TimestampType`: Timestamp with local time zone(TIMESTAMP_LTZ). It represents values comprising values of fields year, month, day, hour, minute, and second, with the session local time-zone. The timestamp value represents an absolute point in time. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 49b9cc798a1b3..285548086a097 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -961,6 +961,14 @@ private[hive] trait HiveInspectors { case _: UserDefinedType[_] => val sqlType = dataType.asInstanceOf[UserDefinedType[_]].sqlType toInspector(sqlType) + // Hive has no TIME type, so it cannot be represented by any Hive object inspector. + case _: TimeType => throw unsupportedHiveType(dataType) + } + + private def unsupportedHiveType(dataType: DataType): AnalysisException = { + new AnalysisException( + errorClass = "UNSUPPORTED_DATATYPE", + messageParameters = Map("typeName" -> toSQLType(dataType))) } /** @@ -1029,6 +1037,9 @@ private[hive] trait HiveInspectors { toInspector(dt) case Literal(_, dt: UserDefinedType[_]) => toInspector(dt.sqlType) + // Hive has no TIME type, so a TIME constant cannot be mapped to a Hive object inspector. + case Literal(_, dt: TimeType) => + throw unsupportedHiveType(dt) // We will enumerate all of the possible constant expressions, throw exception if we missed case Literal(_, dt) => throw SparkException.internalError(s"Hive doesn't support the constant type [$dt].") @@ -1281,6 +1292,8 @@ private[hive] trait HiveInspectors { case NullType => voidTypeInfo case _: DayTimeIntervalType => intervalDayTimeTypeInfo case _: YearMonthIntervalType => intervalYearMonthTypeInfo + // Hive has no TIME type, so there is no Hive TypeInfo to map it to. + case _: TimeType => throw unsupportedHiveType(dt) case dt => throw new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_3095", messageParameters = Map("dt" -> toSQLType(dt))) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala index 5506cf8dae073..acbc72fbf7e0f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriter, Out import org.apache.spark.sql.hive.{HiveInspectors, HiveTableUtil} import org.apache.spark.sql.internal.SessionStateHelper import org.apache.spark.sql.sources.DataSourceRegister -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType, TimeType, UserDefinedType} import org.apache.spark.util.SerializableJobConf /** @@ -115,6 +115,23 @@ case class HiveFileFormat(fileSinkConf: FileSinkDesc) } } + override def supportDataType(dataType: DataType): Boolean = dataType match { + // Hive has no TIME type, so it cannot be stored in a Hive serde table. Reject it explicitly + // (recursing into nested types) while preserving the default behavior for all other types. + case _: TimeType => false + + case st: StructType => st.forall { f => supportDataType(f.dataType) } + + case ArrayType(elementType, _) => supportDataType(elementType) + + case MapType(keyType, valueType, _) => + supportDataType(keyType) && supportDataType(valueType) + + case udt: UserDefinedType[_] => supportDataType(udt.sqlType) + + case _ => true + } + override def supportFieldName(name: String): Boolean = { fileSinkConf.getTableInfo.getOutputFileFormatClassName match { case "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala index 8acabd579d446..b7fb506f07b53 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo import org.apache.hadoop.io.LongWritable import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.{Row, TestUserClassUDT} +import org.apache.spark.sql.{AnalysisException, Row, TestUserClassUDT} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData} @@ -291,4 +291,21 @@ class HiveInspectorSuite extends SparkFunSuite with HiveInspectors { assert(typeInfo2.precision() === 18) assert(typeInfo2.scale() === 10) } + + test("SPARK-57556: TIME type is unsupported in Hive object inspectors") { + val timeType = TimeType() + val expectedParams = Map("typeName" -> s"\"${timeType.sql}\"") + checkError( + exception = intercept[AnalysisException](toInspector(timeType)), + condition = "UNSUPPORTED_DATATYPE", + parameters = expectedParams) + checkError( + exception = intercept[AnalysisException](toInspector(Literal.create(null, timeType))), + condition = "UNSUPPORTED_DATATYPE", + parameters = expectedParams) + checkError( + exception = intercept[AnalysisException](timeType.toTypeInfo), + condition = "UNSUPPORTED_DATATYPE", + parameters = expectedParams) + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index bd2b256e49273..34ffad0c0cbc2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -683,6 +683,56 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter { } } + test("SPARK-57556: TIME type is unsupported when writing to a Hive serde directory") { + // Disable native data source conversion so that the write goes through the Hive serde + // path (HiveFileFormat) instead of a native data source that may support TIME. + withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") { + withTempDir { dir => + // InsertIntoHiveDirCommand wraps the failure in a SparkException, so assert on the cause. + val e = intercept[SparkException] { + sql( + s""" + |INSERT OVERWRITE LOCAL DIRECTORY '${dir.toURI.getPath}' + |STORED AS PARQUET + |SELECT TIME'12:01:02' AS c + """.stripMargin) + } + checkError( + exception = e.getCause.asInstanceOf[AnalysisException], + condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE", + parameters = Map( + "columnName" -> "`c`", + "columnType" -> s"\"${TimeType().sql}\"", + "format" -> "Hive")) + } + } + } + + test("SPARK-57556: nested TIME type is unsupported when writing to a Hive serde directory") { + // Exercises HiveFileFormat.supportDataType's recursion into nested types: a TIME nested inside + // an array must also be rejected, with the full (array) column type reported. + withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") { + withTempDir { dir => + // InsertIntoHiveDirCommand wraps the failure in a SparkException, so assert on the cause. + val e = intercept[SparkException] { + sql( + s""" + |INSERT OVERWRITE LOCAL DIRECTORY '${dir.toURI.getPath}' + |STORED AS PARQUET + |SELECT array(TIME'12:01:02') AS c + """.stripMargin) + } + checkError( + exception = e.getCause.asInstanceOf[AnalysisException], + condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE", + parameters = Map( + "columnName" -> "`c`", + "columnType" -> s"\"${ArrayType(TimeType()).sql}\"", + "format" -> "Hive")) + } + } + } + test("insert overwrite to dir from temp table") { withTempView("test_insert_table") { spark.range(10).selectExpr("id", "id AS str").createOrReplaceTempView("test_insert_table") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index 6a44e17296c0c..8e668fe7c7a1b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.functions.{call_function, max} import org.apache.spark.sql.hive.test.{TestHiveSingleton, TestUDTFJar} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.TimeType import org.apache.spark.tags.SlowHiveTest import org.apache.spark.util.Utils @@ -407,6 +408,19 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton { } } + test("SPARK-57556: TIME type is unsupported as a Hive UDF argument") { + withUserDefinedFunction("testGenericUDFHash" -> true) { + sql(s"CREATE TEMPORARY FUNCTION testGenericUDFHash AS '${classOf[GenericUDFHash].getName}'") + // The Hive UDF resolver wraps the underlying failure, but the message must still clearly + // identify the unsupported TIME type rather than surfacing a MatchError/internal error. + val e = intercept[AnalysisException] { + sql("SELECT testGenericUDFHash(TIME'12:01:02')").collect() + } + assert(e.getMessage.contains("UNSUPPORTED_DATATYPE")) + assert(e.getMessage.contains(TimeType().sql)) + } + } + test("Hive UDFs with insufficient number of input arguments should trigger an analysis error") { withTempView("testUDF") { Seq((1, 2)).toDF("a", "b").createOrReplaceTempView("testUDF")