You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/02/16 19:04:50 UTC
[spark] branch branch-3.4 updated: [SPARK-42326][SQL] Integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE`
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 7e2642db062 [SPARK-42326][SQL] Integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE`
7e2642db062 is described below
commit 7e2642db062cc45c44cbf549d6431bd72915fa17
Author: itholic <ha...@databricks.com>
AuthorDate: Thu Feb 16 22:04:17 2023 +0300
[SPARK-42326][SQL] Integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE`
### What changes were proposed in this pull request?
This PR proposes to integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE`.
And also introduce new error class `UNSUPPORTED_ARROWTYPE`.
### Why are the changes needed?
We should assign proper name for LEGACY errors.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Updated UT.
Closes #39979 from itholic/LEGACY_2099.
Authored-by: itholic <ha...@databricks.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
(cherry picked from commit 9855b137032bf9504dff96eb5bb9951accacac0f)
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 11 ++++++-----
.../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 13 ++++++++++---
.../main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 4 ++--
.../scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala | 9 ++++++++-
.../scala/org/apache/spark/sql/execution/Columnar.scala | 2 +-
.../org/apache/spark/sql/execution/arrow/ArrowWriter.scala | 2 +-
.../spark/sql/execution/arrow/ArrowConvertersSuite.scala | 10 ++++++----
7 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 770223625cf..a0970550d72 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1567,6 +1567,12 @@
],
"sqlState" : "42703"
},
+ "UNSUPPORTED_ARROWTYPE" : {
+ "message" : [
+ "Unsupported arrow type <typeName>."
+ ],
+ "sqlState" : "0A000"
+ },
"UNSUPPORTED_DATATYPE" : {
"message" : [
"Unsupported data type <typeName>."
@@ -4094,11 +4100,6 @@
"Could not compare cost with <cost>."
]
},
- "_LEGACY_ERROR_TEMP_2099" : {
- "message" : [
- "Unsupported data type: <dt>."
- ]
- },
"_LEGACY_ERROR_TEMP_2100" : {
"message" : [
"not support type: <dataType>."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 2bafa2e2c03..17c5b2f4f10 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -25,6 +25,7 @@ import java.time.temporal.ChronoField
import java.util.concurrent.TimeoutException
import com.fasterxml.jackson.core.{JsonParser, JsonToken}
+import org.apache.arrow.vector.types.pojo.ArrowType
import org.apache.hadoop.fs.{FileAlreadyExistsException, FileStatus, Path}
import org.apache.hadoop.fs.permission.FsPermission
import org.codehaus.commons.compiler.{CompileException, InternalCompilerException}
@@ -1124,10 +1125,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
messageParameters = Map("cost" -> cost))
}
- def unsupportedDataTypeError(dt: String): SparkUnsupportedOperationException = {
+ def unsupportedArrowTypeError(typeName: ArrowType): SparkUnsupportedOperationException = {
new SparkUnsupportedOperationException(
- errorClass = "_LEGACY_ERROR_TEMP_2099",
- messageParameters = Map("dt" -> dt))
+ errorClass = "UNSUPPORTED_ARROWTYPE",
+ messageParameters = Map("typeName" -> typeName.toString))
+ }
+
+ def unsupportedDataTypeError(typeName: DataType): SparkUnsupportedOperationException = {
+ new SparkUnsupportedOperationException(
+ errorClass = "UNSUPPORTED_DATATYPE",
+ messageParameters = Map("typeName" -> toSQLType(typeName)))
}
def notSupportTypeError(dataType: DataType): Throwable = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index e854eba0383..6c6635bac57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -56,7 +56,7 @@ private[sql] object ArrowUtils {
case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH)
case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND)
case _ =>
- throw QueryExecutionErrors.unsupportedDataTypeError(dt.catalogString)
+ throw QueryExecutionErrors.unsupportedDataTypeError(dt)
}
def fromArrowType(dt: ArrowType): DataType = dt match {
@@ -79,7 +79,7 @@ private[sql] object ArrowUtils {
case ArrowType.Null.INSTANCE => NullType
case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH => YearMonthIntervalType()
case di: ArrowType.Duration if di.getUnit == TimeUnit.MICROSECOND => DayTimeIntervalType()
- case _ => throw QueryExecutionErrors.unsupportedDataTypeError(dt.toString)
+ case _ => throw QueryExecutionErrors.unsupportedArrowTypeError(dt)
}
/** Maps field from Spark to Arrow. NOTE: timeZoneId required for TimestampType */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
index 6dd02afe19b..2f78d03db80 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
@@ -21,7 +21,7 @@ import java.time.ZoneId
import org.apache.arrow.vector.types.pojo.ArrowType
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException}
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.LA
import org.apache.spark.sql.types._
@@ -54,6 +54,13 @@ class ArrowUtilsSuite extends SparkFunSuite {
roundtrip(TimestampType)
}
assert(tsExMsg.getMessage.contains("timezoneId"))
+ checkError(
+ exception = intercept[SparkUnsupportedOperationException] {
+ ArrowUtils.fromArrowType(new ArrowType.Int(8, false))
+ },
+ errorClass = "UNSUPPORTED_ARROWTYPE",
+ parameters = Map("typeName" -> "Int(8, false)")
+ )
}
test("timestamp") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 684a3f319ab..a051cc26a7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -276,7 +276,7 @@ private object RowToColumnConverter {
case dt: DecimalType => new DecimalConverter(dt)
case mt: MapType => MapConverter(getConverterForType(mt.keyType, nullable = false),
getConverterForType(mt.valueType, mt.valueContainsNull))
- case unknown => throw QueryExecutionErrors.unsupportedDataTypeError(unknown.toString)
+ case unknown => throw QueryExecutionErrors.unsupportedDataTypeError(unknown)
}
if (nullable) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index 34e128a4925..af7126495c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -78,7 +78,7 @@ object ArrowWriter {
case (_: YearMonthIntervalType, vector: IntervalYearVector) => new IntervalYearWriter(vector)
case (_: DayTimeIntervalType, vector: DurationVector) => new DurationWriter(vector)
case (dt, _) =>
- throw QueryExecutionErrors.unsupportedDataTypeError(dt.catalogString)
+ throw QueryExecutionErrors.unsupportedDataTypeError(dt)
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index eb33e2e47ca..82e4c970837 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -28,7 +28,7 @@ import org.apache.arrow.vector.{VectorLoader, VectorSchemaRoot}
import org.apache.arrow.vector.ipc.JsonFileReader
import org.apache.arrow.vector.util.{ByteArrayReadableSeekableByteChannel, Validator}
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException, TaskContext}
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
@@ -1269,9 +1269,11 @@ class ArrowConvertersSuite extends SharedSparkSession {
val e = intercept[SparkException] {
calendarIntervalData.toDF().toArrowBatchRdd.collect()
}
-
- assert(e.getCause.isInstanceOf[UnsupportedOperationException])
- assert(e.getCause.getMessage.contains("Unsupported data type: interval"))
+ checkError(
+ exception = e.getCause.asInstanceOf[SparkUnsupportedOperationException],
+ errorClass = "UNSUPPORTED_DATATYPE",
+ parameters = Map("typeName" -> "\"INTERVAL\"")
+ )
}
test("test Arrow Validator") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org