Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sql-ref-datatypes.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Spark SQL and DataFrames support the following data types:
time-zone.
- `TimeType(precision)`: Represents values comprising values of fields hour, minute and second with the number of decimal digits `precision` following the decimal point in the seconds field, without a time-zone.
The range of values is from `00:00:00` to `23:59:59` for min precision `0`, and to `23:59:59.999999999` for max precision `9`. The default precision is `6`.
- Note: Apache Hive has no TIME type, so `TimeType` is not supported in Hive SerDe interop. Storing it in a Hive SerDe table (including `INSERT OVERWRITE DIRECTORY ... STORED AS`) or passing it to a Hive UDF/UDAF/UDTF raises an error rather than silently converting the value.
- `TimestampType`: Timestamp with local time zone(TIMESTAMP_LTZ). It represents values comprising values of fields year, month, day,
hour, minute, and second, with the session local time-zone. The timestamp value represents an
absolute point in time.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,14 @@ private[hive] trait HiveInspectors {
case _: UserDefinedType[_] =>
val sqlType = dataType.asInstanceOf[UserDefinedType[_]].sqlType
toInspector(sqlType)
// Hive has no TIME type, so it cannot be represented by any Hive object inspector.
case _: TimeType => throw unsupportedHiveType(dataType)
}

private def unsupportedHiveType(dataType: DataType): AnalysisException = {
new AnalysisException(
errorClass = "UNSUPPORTED_DATATYPE",
messageParameters = Map("typeName" -> toSQLType(dataType)))
}

/**
Expand Down Expand Up @@ -1029,6 +1037,9 @@ private[hive] trait HiveInspectors {
toInspector(dt)
case Literal(_, dt: UserDefinedType[_]) =>
toInspector(dt.sqlType)
// Hive has no TIME type, so a TIME constant cannot be mapped to a Hive object inspector.
case Literal(_, dt: TimeType) =>
throw unsupportedHiveType(dt)
// We will enumerate all of the possible constant expressions, throw exception if we missed
case Literal(_, dt) =>
throw SparkException.internalError(s"Hive doesn't support the constant type [$dt].")
Expand Down Expand Up @@ -1281,6 +1292,8 @@ private[hive] trait HiveInspectors {
case NullType => voidTypeInfo
case _: DayTimeIntervalType => intervalDayTimeTypeInfo
case _: YearMonthIntervalType => intervalYearMonthTypeInfo
// Hive has no TIME type, so there is no Hive TypeInfo to map it to.
case _: TimeType => throw unsupportedHiveType(dt)
case dt =>
throw new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_3095", messageParameters = Map("dt" -> toSQLType(dt)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriter, Out
import org.apache.spark.sql.hive.{HiveInspectors, HiveTableUtil}
import org.apache.spark.sql.internal.SessionStateHelper
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType, TimeType, UserDefinedType}
import org.apache.spark.util.SerializableJobConf

/**
Expand Down Expand Up @@ -115,6 +115,23 @@ case class HiveFileFormat(fileSinkConf: FileSinkDesc)
}
}

override def supportDataType(dataType: DataType): Boolean = dataType match {
// Hive has no TIME type, so it cannot be stored in a Hive serde table. Reject it explicitly
// (recursing into nested types) while preserving the default behavior for all other types.
case _: TimeType => false

case st: StructType => st.forall { f => supportDataType(f.dataType) }

case ArrayType(elementType, _) => supportDataType(elementType)

case MapType(keyType, valueType, _) =>
supportDataType(keyType) && supportDataType(valueType)

case udt: UserDefinedType[_] => supportDataType(udt.sqlType)

case _ => true
}

override def supportFieldName(name: String): Boolean = {
fileSinkConf.getTableInfo.getOutputFileFormatClassName match {
case "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo
import org.apache.hadoop.io.LongWritable

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{Row, TestUserClassUDT}
import org.apache.spark.sql.{AnalysisException, Row, TestUserClassUDT}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
Expand Down Expand Up @@ -291,4 +291,21 @@ class HiveInspectorSuite extends SparkFunSuite with HiveInspectors {
assert(typeInfo2.precision() === 18)
assert(typeInfo2.scale() === 10)
}

test("SPARK-57556: TIME type is unsupported in Hive object inspectors") {
val timeType = TimeType()
val expectedParams = Map("typeName" -> s"\"${timeType.sql}\"")
checkError(
exception = intercept[AnalysisException](toInspector(timeType)),
condition = "UNSUPPORTED_DATATYPE",
parameters = expectedParams)
checkError(
exception = intercept[AnalysisException](toInspector(Literal.create(null, timeType))),
condition = "UNSUPPORTED_DATATYPE",
parameters = expectedParams)
checkError(
exception = intercept[AnalysisException](timeType.toTypeInfo),
condition = "UNSUPPORTED_DATATYPE",
parameters = expectedParams)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,56 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter {
}
}

test("SPARK-57556: TIME type is unsupported when writing to a Hive serde directory") {
// Disable native data source conversion so that the write goes through the Hive serde
// path (HiveFileFormat) instead of a native data source that may support TIME.
withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") {
withTempDir { dir =>
// InsertIntoHiveDirCommand wraps the failure in a SparkException, so assert on the cause.
val e = intercept[SparkException] {
sql(
s"""
|INSERT OVERWRITE LOCAL DIRECTORY '${dir.toURI.getPath}'
|STORED AS PARQUET
|SELECT TIME'12:01:02' AS c
""".stripMargin)
}
checkError(
exception = e.getCause.asInstanceOf[AnalysisException],
condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
parameters = Map(
"columnName" -> "`c`",
"columnType" -> s"\"${TimeType().sql}\"",
"format" -> "Hive"))
}
}
}

test("SPARK-57556: nested TIME type is unsupported when writing to a Hive serde directory") {
// Exercises HiveFileFormat.supportDataType's recursion into nested types: a TIME nested inside
// an array must also be rejected, with the full (array) column type reported.
withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") {
withTempDir { dir =>
// InsertIntoHiveDirCommand wraps the failure in a SparkException, so assert on the cause.
val e = intercept[SparkException] {
sql(
s"""
|INSERT OVERWRITE LOCAL DIRECTORY '${dir.toURI.getPath}'
|STORED AS PARQUET
|SELECT array(TIME'12:01:02') AS c
""".stripMargin)
}
checkError(
exception = e.getCause.asInstanceOf[AnalysisException],
condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
parameters = Map(
"columnName" -> "`c`",
"columnType" -> s"\"${ArrayType(TimeType()).sql}\"",
"format" -> "Hive"))
}
}
}

test("insert overwrite to dir from temp table") {
withTempView("test_insert_table") {
spark.range(10).selectExpr("id", "id AS str").createOrReplaceTempView("test_insert_table")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.WholeStageCodegenExec
import org.apache.spark.sql.functions.{call_function, max}
import org.apache.spark.sql.hive.test.{TestHiveSingleton, TestUDTFJar}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.TimeType
import org.apache.spark.tags.SlowHiveTest
import org.apache.spark.util.Utils

Expand Down Expand Up @@ -407,6 +408,19 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton {
}
}

test("SPARK-57556: TIME type is unsupported as a Hive UDF argument") {
withUserDefinedFunction("testGenericUDFHash" -> true) {
sql(s"CREATE TEMPORARY FUNCTION testGenericUDFHash AS '${classOf[GenericUDFHash].getName}'")
// The Hive UDF resolver wraps the underlying failure, but the message must still clearly
// identify the unsupported TIME type rather than surfacing a MatchError/internal error.
val e = intercept[AnalysisException] {
sql("SELECT testGenericUDFHash(TIME'12:01:02')").collect()
}
assert(e.getMessage.contains("UNSUPPORTED_DATATYPE"))
assert(e.getMessage.contains(TimeType().sql))
}
}

test("Hive UDFs with insufficient number of input arguments should trigger an analysis error") {
withTempView("testUDF") {
Seq((1, 2)).toDF("a", "b").createOrReplaceTempView("testUDF")
Expand Down