You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/11/28 09:01:17 UTC
[spark] branch master updated: [SPARK-41272][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2019
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d979736a9eb [SPARK-41272][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2019
d979736a9eb is described below
commit d979736a9eb754725d33fd5baca88a1c1a8c23ce
Author: panbingkun <pb...@gmail.com>
AuthorDate: Mon Nov 28 12:01:02 2022 +0300
[SPARK-41272][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2019
### What changes were proposed in this pull request?
In the PR, I propose to assign the name `NULL_MAP_KEY` to the error class `_LEGACY_ERROR_TEMP_2019`.
### Why are the changes needed?
Proper names of error classes should improve user experience with Spark SQL.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass GA.
Closes #38808 from panbingkun/LEGACY_2019.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 10 ++---
.../spark/sql/errors/QueryExecutionErrors.scala | 2 +-
.../catalyst/encoders/ExpressionEncoderSuite.scala | 20 +++++++---
.../expressions/CollectionExpressionsSuite.scala | 10 ++---
.../catalyst/expressions/ComplexTypeSuite.scala | 11 +++---
.../expressions/ExpressionEvalHelper.scala | 43 +++++++++++++++++++++-
.../expressions/ObjectExpressionsSuite.scala | 10 ++---
.../catalyst/util/ArrayBasedMapBuilderSuite.scala | 8 +++-
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 38 ++++++++++++++-----
9 files changed, 113 insertions(+), 39 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 1246e870e0d..9f4337d0618 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -895,6 +895,11 @@
"The comparison result is null. If you want to handle null as 0 (equal), you can set \"spark.sql.legacy.allowNullComparisonResultInArraySort\" to \"true\"."
]
},
+ "NULL_MAP_KEY" : {
+ "message" : [
+ "Cannot use null as map key."
+ ]
+ },
"NUMERIC_OUT_OF_SUPPORTED_RANGE" : {
"message" : [
"The value <value> cannot be interpreted as a numeric since it has more than 38 digits."
@@ -3504,11 +3509,6 @@
"class `<cls>` is not supported by `MapObjects` as resulting collection."
]
},
- "_LEGACY_ERROR_TEMP_2019" : {
- "message" : [
- "Cannot use null as map key!"
- ]
- },
"_LEGACY_ERROR_TEMP_2020" : {
"message" : [
"Couldn't find a valid constructor on <cls>"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 5db54f7f4cf..15dfa581c59 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -444,7 +444,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
def nullAsMapKeyNotAllowedError(): SparkRuntimeException = {
new SparkRuntimeException(
- errorClass = "_LEGACY_ERROR_TEMP_2019",
+ errorClass = "NULL_MAP_KEY",
messageParameters = Map.empty)
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 9b481b13fee..e9336405a53 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -24,7 +24,7 @@ import java.util.Arrays
import scala.collection.mutable.ArrayBuffer
import scala.reflect.runtime.universe.TypeTag
-import org.apache.spark.SparkArithmeticException
+import org.apache.spark.{SparkArithmeticException, SparkRuntimeException}
import org.apache.spark.sql.{Encoder, Encoders}
import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData, ScroogeLikeExample}
import org.apache.spark.sql.catalyst.analysis.AnalysisTest
@@ -539,14 +539,24 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
test("null check for map key: String") {
val toRow = ExpressionEncoder[Map[String, Int]]().createSerializer()
- val e = intercept[RuntimeException](toRow(Map(("a", 1), (null, 2))))
- assert(e.getMessage.contains("Cannot use null as map key"))
+ val e = intercept[SparkRuntimeException](toRow(Map(("a", 1), (null, 2))))
+ assert(e.getCause.isInstanceOf[SparkRuntimeException])
+ checkError(
+ exception = e.getCause.asInstanceOf[SparkRuntimeException],
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
}
test("null check for map key: Integer") {
val toRow = ExpressionEncoder[Map[Integer, String]]().createSerializer()
- val e = intercept[RuntimeException](toRow(Map((1, "a"), (null, "b"))))
- assert(e.getMessage.contains("Cannot use null as map key"))
+ val e = intercept[SparkRuntimeException](toRow(Map((1, "a"), (null, "b"))))
+ assert(e.getCause.isInstanceOf[SparkRuntimeException])
+ checkError(
+ exception = e.getCause.asInstanceOf[SparkRuntimeException],
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
}
test("throw exception for tuples with more than 22 elements") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 55456e309b6..f6c529ec4ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -24,7 +24,7 @@ import java.util.TimeZone
import scala.language.implicitConversions
import scala.util.Random
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -331,9 +331,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
checkEvaluation(MapFromEntries(ai4), create_map(1 -> 20))
}
// Map key can't be null
- checkExceptionInExpression[RuntimeException](
+ checkErrorInExpression[SparkRuntimeException](
MapFromEntries(ai5),
- "Cannot use null as map key")
+ "NULL_MAP_KEY")
checkEvaluation(MapFromEntries(ai6), null)
// Non-primitive-type keys and values
@@ -358,9 +358,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
checkEvaluation(MapFromEntries(as4), create_map("a" -> "bb"))
}
// Map key can't be null
- checkExceptionInExpression[RuntimeException](
+ checkExceptionInExpression[SparkRuntimeException](
MapFromEntries(as5),
- "Cannot use null as map key")
+ "NULL_MAP_KEY")
checkEvaluation(MapFromEntries(as6), null)
// map key can't be map
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index b9f6ca1b191..22b0635dde5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
@@ -28,7 +28,6 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
-
class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
/**
@@ -278,9 +277,9 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
create_map(intSeq, strWithNull.map(_.value)))
// Map key can't be null
- checkExceptionInExpression[RuntimeException](
+ checkErrorInExpression[SparkRuntimeException](
CreateMap(interlace(strWithNull, intSeq.map(Literal(_)))),
- "Cannot use null as map key")
+ "NULL_MAP_KEY")
checkExceptionInExpression[RuntimeException](
CreateMap(Seq(Literal(1), Literal(2), Literal(1), Literal(3))), "Duplicate map key")
@@ -382,9 +381,9 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(MapFromArrays(nullArray, nullArray), null)
// Map key can't be null
- checkExceptionInExpression[RuntimeException](
+ checkErrorInExpression[SparkRuntimeException](
MapFromArrays(intWithNullArray, strArray),
- "Cannot use null as map key")
+ "NULL_MAP_KEY")
checkExceptionInExpression[RuntimeException](
MapFromArrays(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 41dd45f68e7..5be0cae4a22 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -24,7 +24,7 @@ import org.scalactic.TripleEqualsSupport.Spread
import org.scalatest.exceptions.TestFailedException
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, SparkThrowable}
import org.apache.spark.serializer.JavaSerializer
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
@@ -147,6 +147,47 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
}
}
+ protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
+ expression: => Expression,
+ errorClass: String,
+ parameters: Map[String, String] = Map.empty): Unit = {
+ checkErrorInExpression[T](expression, InternalRow.empty, errorClass, parameters)
+ }
+
+ protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
+ expression: => Expression,
+ inputRow: InternalRow,
+ errorClass: String,
+ parameters: Map[String, String]): Unit = {
+
+ def checkException(eval: => Unit, testMode: String): Unit = {
+ val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN)
+ withClue(s"($testMode)") {
+ val e = intercept[T] {
+ for (fallbackMode <- modes) {
+ withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
+ eval
+ }
+ }
+ }
+ checkError(
+ exception = e,
+ errorClass = errorClass,
+ parameters = parameters
+ )
+ }
+ }
+
+ // Make it as method to obtain fresh expression everytime.
+ def expr = prepareEvaluation(expression)
+
+ checkException(evaluateWithoutCodegen(expr, inputRow), "non-codegen mode")
+ checkException(evaluateWithMutableProjection(expr, inputRow), "codegen mode")
+ if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
+ checkException(evaluateWithUnsafeProjection(expr, inputRow), "unsafe mode")
+ }
+ }
+
protected def checkExceptionInExpression[T <: Throwable : ClassTag](
expression: => Expression,
expectedErrMsg: String): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 4ebd57e82e6..fa82dd9bac9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -25,7 +25,7 @@ import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag
import scala.util.Random
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, SparkRuntimeException}
import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
import org.apache.spark.sql.{RandomDataGenerator, Row}
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
@@ -605,15 +605,15 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val serializer3 =
javaMapSerializerFor(classOf[java.lang.Integer], classOf[java.lang.String])(
Literal.fromObject(javaMapHasNullKey))
- checkExceptionInExpression[RuntimeException](
- serializer3, EmptyRow, "Cannot use null as map key!")
+ checkErrorInExpression[SparkRuntimeException](
+ serializer3, EmptyRow, "NULL_MAP_KEY", Map[String, String]())
// Scala Map
val serializer4 = scalaMapSerializerFor[java.lang.Integer, String](
Literal.fromObject(scalaMapHasNullKey))
- checkExceptionInExpression[RuntimeException](
- serializer4, EmptyRow, "Cannot use null as map key!")
+ checkErrorInExpression[SparkRuntimeException](
+ serializer4, EmptyRow, "NULL_MAP_KEY", Map[String, String]())
}
test("SPARK-35244: invoke should throw the original exception") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
index 6e07cd5d641..c8dbecac249 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.util
import org.apache.spark.SparkFunSuite
+import org.apache.spark.SparkRuntimeException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{UnsafeArrayData, UnsafeRow}
import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -40,8 +41,11 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
test("fail with null key") {
val builder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
builder.put(1, null) // null value is OK
- val e = intercept[RuntimeException](builder.put(null, 1))
- assert(e.getMessage.contains("Cannot use null as map key"))
+ checkError(
+ exception = intercept[SparkRuntimeException](builder.put(null, 1)),
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
}
test("fail while duplicated keys detected") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 6baca3a4beb..e9455ea51c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -24,7 +24,7 @@ import java.sql.{Date, Timestamp}
import scala.util.Random
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkRuntimeException}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.expressions.{Alias, ArraysZip, AttributeReference, Expression, NamedExpression, UnaryExpression}
@@ -179,10 +179,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
)
val df5 = Seq((Seq("a", null), Seq(1, 2))).toDF("k", "v")
- val msg1 = intercept[Exception] {
+ val e1 = intercept[SparkException] {
df5.select(map_from_arrays($"k", $"v")).collect
- }.getMessage
- assert(msg1.contains("Cannot use null as map key"))
+ }
+ assert(e1.getCause.isInstanceOf[SparkRuntimeException])
+ checkError(
+ exception = e1.getCause.asInstanceOf[SparkRuntimeException],
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
val df6 = Seq((Seq(1, 2), Seq("a"))).toDF("k", "v")
val msg2 = intercept[Exception] {
@@ -4496,15 +4501,25 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
assert(ex2.getMessage.contains(
"The number of lambda function arguments '3' does not match"))
- val ex3 = intercept[Exception] {
+ val ex3 = intercept[SparkException] {
dfExample1.selectExpr("transform_keys(i, (k, v) -> v)").show()
}
- assert(ex3.getMessage.contains("Cannot use null as map key"))
+ assert(ex3.getCause.isInstanceOf[SparkRuntimeException])
+ checkError(
+ exception = ex3.getCause.asInstanceOf[SparkRuntimeException],
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
val ex3a = intercept[Exception] {
dfExample1.select(transform_keys(col("i"), (k, v) => v)).show()
}
- assert(ex3a.getMessage.contains("Cannot use null as map key"))
+ assert(ex3a.getCause.isInstanceOf[SparkRuntimeException])
+ checkError(
+ exception = ex3a.getCause.asInstanceOf[SparkRuntimeException],
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
checkError(
exception = intercept[AnalysisException] {
@@ -5107,10 +5122,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
test("SPARK-24734: Fix containsNull of Concat for array type") {
val df = Seq((Seq(1), Seq[Integer](null), Seq("a", "b"))).toDF("k1", "k2", "v")
- val ex = intercept[Exception] {
+ val e1 = intercept[SparkException] {
df.select(map_from_arrays(concat($"k1", $"k2"), $"v")).show()
}
- assert(ex.getMessage.contains("Cannot use null as map key"))
+ assert(e1.getCause.isInstanceOf[SparkRuntimeException])
+ checkError(
+ exception = e1.getCause.asInstanceOf[SparkRuntimeException],
+ errorClass = "NULL_MAP_KEY",
+ parameters = Map.empty
+ )
}
test("SPARK-26370: Fix resolution of higher-order function for the same identifier") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org