You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/02/05 21:05:19 UTC
[spark] branch master updated: [SPARK-41295][SPARK-41296][SQL] Rename the error classes
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6bb68b5f75a [SPARK-41295][SPARK-41296][SQL] Rename the error classes
6bb68b5f75a is described below
commit 6bb68b5f75ac883423a68583750258656f381c33
Author: narek_karapetian <na...@yandex.ru>
AuthorDate: Mon Feb 6 00:04:40 2023 +0300
[SPARK-41295][SPARK-41296][SQL] Rename the error classes
### What changes were proposed in this pull request?
In the PR, I propose to assign the proper names to the legacy error classes:
- `_LEGACY_ERROR_TEMP_1105` -> `INVALID_EXTRACT_FIELD_TYPE`
- `_LEGACY_ERROR_TEMP_1106` -> `INVALID_EXTRACT_BASE_FIELD_TYPE`
- `_LEGACY_ERROR_TEMP_1209` -> `AMBIGUOUS_REFERENCE_TO_FIELDS`
Changed error messages for:
- `_LEGACY_ERROR_TEMP_1106` from `Can't extract value from <child>: need struct type but got <other>.` to `"Can't extract a value from <base>. Need a complex type [struct, array, map] but got <other>."`
- `_LEGACY_ERROR_TEMP_1209` from `"Ambiguous reference to fields <fields>."` to `"Ambiguous reference to the field <field>. It appears <count> times in the schema."`
- `_LEGACY_ERROR_TEMP_1106` from `"Field name should be String Literal, but it's <extraction>."` to `"Field name should be a non-null string literal, but it's <extraction>."`
and modify test suite to use checkError() which checks the error class name, context and etc.
**Also this PR contains an additional change (AMBIGUOUS_REFERENCE_TO_FIELDS) which is not tracked in JIRA.**
### Why are the changes needed?
Proper name improves user experience w/ Spark SQL. fix a bug, you can clarify why it is a bug.
### Does this PR introduce _any_ user-facing change?
Yes, the PR changes an user-facing error message.
### How was this patch tested?
Added additional test cases in `org.apache.spark.sql.errors.QueryCompilationErrorsSuite`
Closes #39501 from NarekDW/SPARK-41295.
Authored-by: narek_karapetian <na...@yandex.ru>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 33 +++++-----
.../expressions/complexTypeExtractors.scala | 6 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 16 ++---
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++--
.../catalyst/expressions/ComplexTypeSuite.scala | 21 ------
.../apache/spark/sql/ColumnExpressionSuite.scala | 76 ++++++++++++++--------
.../sql/errors/QueryCompilationErrorsSuite.scala | 69 +++++++++++++++++++-
.../sql/hive/execution/HiveResolutionSuite.scala | 12 ++--
8 files changed, 167 insertions(+), 86 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 7ecd924ea8d..afabc56a431 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -17,6 +17,12 @@
],
"sqlState" : "42704"
},
+ "AMBIGUOUS_REFERENCE_TO_FIELDS" : {
+ "message" : [
+ "Ambiguous reference to the field <field>. It appears <count> times in the schema."
+ ],
+ "sqlState" : "42000"
+ },
"ARITHMETIC_OVERFLOW" : {
"message" : [
"<message>.<alternative> If necessary set <config> to \"false\" to bypass this error."
@@ -750,12 +756,24 @@
],
"sqlState" : "42K05"
},
+ "INVALID_EXTRACT_BASE_FIELD_TYPE" : {
+ "message" : [
+ "Can't extract a value from <base>. Need a complex type [STRUCT, ARRAY, MAP] but got <other>."
+ ],
+ "sqlState" : "42000"
+ },
"INVALID_EXTRACT_FIELD" : {
"message" : [
"Cannot extract <field> from <expr>."
],
"sqlState" : "42601"
},
+ "INVALID_EXTRACT_FIELD_TYPE" : {
+ "message" : [
+ "Field name should be a non-null string literal, but it's <extraction>."
+ ],
+ "sqlState" : "42000"
+ },
"INVALID_FIELD_NAME" : {
"message" : [
"Field name <fieldName> is invalid: <path> is not a struct."
@@ -2491,16 +2509,6 @@
"The second argument should be a double literal."
]
},
- "_LEGACY_ERROR_TEMP_1105" : {
- "message" : [
- "Field name should be String Literal, but it's <extraction>."
- ]
- },
- "_LEGACY_ERROR_TEMP_1106" : {
- "message" : [
- "Can't extract value from <child>: need struct type but got <other>."
- ]
- },
"_LEGACY_ERROR_TEMP_1107" : {
"message" : [
"Table <table> declares <batchWrite> capability but <v2WriteClassName> is not an instance of <v1WriteClassName>."
@@ -2972,11 +2980,6 @@
"The duration and time inputs to window must be an integer, long or string literal."
]
},
- "_LEGACY_ERROR_TEMP_1209" : {
- "message" : [
- "Ambiguous reference to fields <fields>."
- ]
- },
"_LEGACY_ERROR_TEMP_1210" : {
"message" : [
"The second argument in <funcName> should be a boolean literal."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index d0ef5365bc9..e22af21daaa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -64,7 +64,7 @@ object ExtractValue {
case (_: ArrayType, _) => GetArrayItem(child, extraction)
- case (MapType(kt, _, _), _) => GetMapValue(child, extraction)
+ case (MapType(_, _, _), _) => GetMapValue(child, extraction)
case (otherType, _) =>
throw QueryCompilationErrors.dataTypeUnsupportedByExtractValueError(
@@ -82,8 +82,8 @@ object ExtractValue {
if (ordinal == -1) {
throw QueryCompilationErrors.noSuchStructFieldInGivenFieldsError(fieldName, fields)
} else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
- throw QueryCompilationErrors.ambiguousReferenceToFieldsError(
- fields.filter(checkField).mkString(", "))
+ val numberOfAppearance = fields.count(checkField)
+ throw QueryCompilationErrors.ambiguousReferenceToFieldsError(fieldName, numberOfAppearance)
} else {
ordinal
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 0bc3b2fc4f6..de0a0124767 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1117,14 +1117,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
dataType match {
case StructType(_) =>
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1105",
- messageParameters = Map("extraction" -> extraction.toString))
+ errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+ messageParameters = Map("extraction" -> toSQLExpr(extraction)))
case other =>
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1106",
+ errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
messageParameters = Map(
- "child" -> child.toString,
- "other" -> other.catalogString))
+ "base" -> toSQLExpr(child),
+ "other" -> toSQLType(other)))
}
}
@@ -2099,10 +2099,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
"fields" -> fields.map(f => toSQLId(f.name)).mkString(", ")))
}
- def ambiguousReferenceToFieldsError(fields: String): Throwable = {
+ def ambiguousReferenceToFieldsError(field: String, numberOfAppearance: Int): Throwable = {
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1209",
- messageParameters = Map("fields" -> fields))
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ messageParameters = Map("field" -> toSQLId(field), "count" -> numberOfAppearance.toString))
}
def secondArgumentInFunctionIsNotBooleanLiteralError(funcName: String): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 71d3deb36c2..cbd6749807f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -370,17 +370,25 @@ class AnalysisErrorSuite extends AnalysisTest {
"expressionAnyValue" -> "\"any_value(b)\"")
)
- errorTest(
+ errorClassTest(
"ambiguous field",
nestedRelation.select($"top.duplicateField"),
- "Ambiguous reference to fields" :: "duplicateField" :: Nil,
- caseSensitive = false)
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ messageParameters = Map(
+ "field" -> "`duplicateField`",
+ "count" -> "2"),
+ caseSensitive = false
+ )
- errorTest(
+ errorClassTest(
"ambiguous field due to case insensitivity",
nestedRelation.select($"top.differentCase"),
- "Ambiguous reference to fields" :: "differentCase" :: "differentcase" :: Nil,
- caseSensitive = false)
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ messageParameters = Map(
+ "field" -> "`differentCase`",
+ "count" -> "2"),
+ caseSensitive = false
+ )
errorClassTest(
"missing field",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index ecff40c0b3b..8dbca473cfb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -462,27 +462,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
1, create_row(create_row(1)))
}
- test("error message of ExtractValue") {
- val structType = StructType(StructField("a", StringType, true) :: Nil)
- val otherType = StringType
-
- def checkErrorMessage(
- childDataType: DataType,
- fieldDataType: DataType,
- errorMessage: String): Unit = {
- val e = intercept[org.apache.spark.sql.AnalysisException] {
- ExtractValue(
- Literal.create(null, childDataType),
- Literal.create(null, fieldDataType),
- _ == _)
- }
- assert(e.getMessage().contains(errorMessage))
- }
-
- checkErrorMessage(structType, IntegerType, "Field name should be String Literal")
- checkErrorMessage(otherType, StringType, "Can't extract value from")
- }
-
test("ensure to preserve metadata") {
val metadata = new MetadataBuilder()
.putString("key", "value")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 718405bd8ac..b93b643cbb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -1087,17 +1087,22 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
}
test("withField should throw an exception if intermediate field reference is ambiguous") {
- intercept[AnalysisException] {
- val structLevel2: DataFrame = spark.createDataFrame(
- sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
- StructType(Seq(
- StructField("a", StructType(Seq(
- StructField("a", structType, nullable = false),
- StructField("a", structType, nullable = false))),
- nullable = false))))
+ checkError(
+ exception = intercept[AnalysisException] {
+ val structLevel2: DataFrame = spark.createDataFrame(
+ sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+ StructType(Seq(
+ StructField("a", StructType(Seq(
+ StructField("a", structType, nullable = false),
+ StructField("a", structType, nullable = false))),
+ nullable = false))))
- structLevel2.withColumn("a", $"a".withField("a.b", lit(2)))
- }.getMessage should include("Ambiguous reference to fields")
+ structLevel2.withColumn("a", $"a".withField("a.b", lit(2)))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ sqlState = "42000",
+ parameters = Map("field" -> "`a`", "count" -> "2")
+ )
}
test("withField should add field with no name") {
@@ -1647,10 +1652,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
.select($"struct_col".withField("a.c", lit(3))),
Row(Row(Row(1, 2, 3))))
- intercept[AnalysisException] {
- sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
- .select($"struct_col".withField("a.c", lit(3)))
- }.getMessage should include("Ambiguous reference to fields")
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+ .select($"struct_col".withField("a.c", lit(3)))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ sqlState = "42000",
+ parameters = Map("field" -> "`a`", "count" -> "2")
+ )
checkAnswer(
sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
@@ -1862,17 +1872,22 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
}
test("dropFields should throw an exception if intermediate field reference is ambiguous") {
- intercept[AnalysisException] {
- val structLevel2: DataFrame = spark.createDataFrame(
- sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
- StructType(Seq(
- StructField("a", StructType(Seq(
- StructField("a", structType, nullable = false),
- StructField("a", structType, nullable = false))),
- nullable = false))))
+ checkError(
+ exception = intercept[AnalysisException] {
+ val structLevel2: DataFrame = spark.createDataFrame(
+ sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+ StructType(Seq(
+ StructField("a", StructType(Seq(
+ StructField("a", structType, nullable = false),
+ StructField("a", structType, nullable = false))),
+ nullable = false))))
- structLevel2.withColumn("a", $"a".dropFields("a.b"))
- }.getMessage should include("Ambiguous reference to fields")
+ structLevel2.withColumn("a", $"a".dropFields("a.b"))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ sqlState = "42000",
+ parameters = Map("field" -> "`a`", "count" -> "2")
+ )
}
test("dropFields should drop field in struct") {
@@ -2208,10 +2223,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
.select($"struct_col".dropFields("a.b")),
Row(Row(Row(1))))
- intercept[AnalysisException] {
- sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
- .select($"struct_col".dropFields("a.c"))
- }.getMessage should include("Ambiguous reference to fields")
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+ .select($"struct_col".dropFields("a.c"))
+ },
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ sqlState = "42000",
+ parameters = Map("field" -> "`a`", "count" -> "2")
+ )
checkAnswer(
sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 55964afba74..47c4a86ccc5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtil
import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.expressions.SparkUserDefinedFunction
-import org.apache.spark.sql.functions.{from_json, grouping, grouping_id, lit, struct, sum, udf}
+import org.apache.spark.sql.functions.{array, from_json, grouping, grouping_id, lit, struct, sum, udf}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{IntegerType, MapType, StringType, StructField, StructType}
@@ -715,6 +715,73 @@ class QueryCompilationErrorsSuite
)
}
}
+
+ test("AMBIGUOUS_REFERENCE_TO_FIELDS: select ambiguous field from struct") {
+ val data = Seq(
+ Row(Row("test1", "test1")),
+ Row(Row("test2", "test2")))
+
+ val schema = new StructType()
+ .add("name",
+ new StructType()
+ .add("firstname", StringType)
+ .add("firstname", StringType))
+
+ val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.select($"name.firstname")
+ },
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ sqlState = "42000",
+ parameters = Map("field" -> "`firstname`", "count" -> "2")
+ )
+ }
+
+ test("INVALID_EXTRACT_BASE_FIELD_TYPE: select ambiguous field from struct") {
+ val df = Seq("test").toDF("firstname")
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.select($"firstname.test_field")
+ },
+ errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
+ sqlState = "42000",
+ parameters = Map("base" -> "\"firstname\"", "other" -> "\"STRING\""))
+ }
+
+ test("INVALID_EXTRACT_FIELD_TYPE: extract not string literal field") {
+ val structureData = Seq(
+ Row(Row(Row("test1", "test1"))),
+ Row(Row(Row("test2", "test2"))))
+
+ val structureSchema = new StructType()
+ .add("name",
+ new StructType()
+ .add("inner",
+ new StructType()
+ .add("firstname", StringType)
+ .add("lastname", StringType)))
+
+ val df = spark.createDataFrame(spark.sparkContext.parallelize(structureData), structureSchema)
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.select(struct($"name"(struct("test"))))
+ },
+ errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+ sqlState = "42000",
+ parameters = Map("extraction" -> "\"struct(test)\""))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df.select($"name"(array("test")))
+ },
+ errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+ sqlState = "42000",
+ parameters = Map("extraction" -> "\"array(test)\""))
+ }
}
class MyCastToString extends SparkUserDefinedFunction(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 72cfabb9246..8e2a5a66172 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -44,10 +44,14 @@ class HiveResolutionSuite extends HiveComparisonTest {
.createOrReplaceTempView("nested")
// there are 2 filed matching field name "b", we should report Ambiguous reference error
- val exception = intercept[AnalysisException] {
- sql("SELECT a[0].b from nested").queryExecution.analyzed
- }
- assert(exception.getMessage.contains("Ambiguous reference to fields"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("SELECT a[0].b from nested").queryExecution.analyzed
+ },
+ errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+ sqlState = "42000",
+ parameters = Map("field" -> "`b`", "count" -> "2")
+ )
}
createQueryTest("table.attr",
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org