You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/02/05 21:05:19 UTC

[spark] branch master updated: [SPARK-41295][SPARK-41296][SQL] Rename the error classes

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6bb68b5f75a [SPARK-41295][SPARK-41296][SQL] Rename the error classes
6bb68b5f75a is described below

commit 6bb68b5f75ac883423a68583750258656f381c33
Author: narek_karapetian <na...@yandex.ru>
AuthorDate: Mon Feb 6 00:04:40 2023 +0300

    [SPARK-41295][SPARK-41296][SQL] Rename the error classes
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to assign the proper names to the legacy error classes:
    
    - `_LEGACY_ERROR_TEMP_1105` ->  `INVALID_EXTRACT_FIELD_TYPE`
    - `_LEGACY_ERROR_TEMP_1106` ->  `INVALID_EXTRACT_BASE_FIELD_TYPE`
    - `_LEGACY_ERROR_TEMP_1209` ->  `AMBIGUOUS_REFERENCE_TO_FIELDS`
    
    Changed error messages for:
    - `_LEGACY_ERROR_TEMP_1106` from `Can't extract value from <child>: need struct type but got <other>.` to `"Can't extract a value from <base>. Need a complex type [struct, array, map] but got <other>."`
    - `_LEGACY_ERROR_TEMP_1209` from `"Ambiguous reference to fields <fields>."` to `"Ambiguous reference to the field <field>. It appears <count> times in the schema."`
    - `_LEGACY_ERROR_TEMP_1106` from `"Field name should be String Literal, but it's <extraction>."` to `"Field name should be a non-null string literal, but it's <extraction>."`
    
    and modify test suite to use checkError() which checks the error class name, context and etc.
    
    **Also this PR contains an additional change (AMBIGUOUS_REFERENCE_TO_FIELDS) which is not tracked in JIRA.**
    
    ### Why are the changes needed?
    Proper name improves user experience w/ Spark SQL. fix a bug, you can clarify why it is a bug.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, the PR changes an user-facing error message.
    
    ### How was this patch tested?
    Added additional test cases in `org.apache.spark.sql.errors.QueryCompilationErrorsSuite`
    
    Closes #39501 from NarekDW/SPARK-41295.
    
    Authored-by: narek_karapetian <na...@yandex.ru>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 33 +++++-----
 .../expressions/complexTypeExtractors.scala        |  6 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  | 16 ++---
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++--
 .../catalyst/expressions/ComplexTypeSuite.scala    | 21 ------
 .../apache/spark/sql/ColumnExpressionSuite.scala   | 76 ++++++++++++++--------
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 69 +++++++++++++++++++-
 .../sql/hive/execution/HiveResolutionSuite.scala   | 12 ++--
 8 files changed, 167 insertions(+), 86 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 7ecd924ea8d..afabc56a431 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -17,6 +17,12 @@
     ],
     "sqlState" : "42704"
   },
+  "AMBIGUOUS_REFERENCE_TO_FIELDS" : {
+    "message" : [
+      "Ambiguous reference to the field <field>. It appears <count> times in the schema."
+    ],
+    "sqlState" : "42000"
+  },
   "ARITHMETIC_OVERFLOW" : {
     "message" : [
       "<message>.<alternative> If necessary set <config> to \"false\" to bypass this error."
@@ -750,12 +756,24 @@
     ],
     "sqlState" : "42K05"
   },
+  "INVALID_EXTRACT_BASE_FIELD_TYPE" : {
+    "message" : [
+      "Can't extract a value from <base>. Need a complex type [STRUCT, ARRAY, MAP] but got <other>."
+    ],
+    "sqlState" : "42000"
+  },
   "INVALID_EXTRACT_FIELD" : {
     "message" : [
       "Cannot extract <field> from <expr>."
     ],
     "sqlState" : "42601"
   },
+  "INVALID_EXTRACT_FIELD_TYPE" : {
+    "message" : [
+      "Field name should be a non-null string literal, but it's <extraction>."
+    ],
+    "sqlState" : "42000"
+  },
   "INVALID_FIELD_NAME" : {
     "message" : [
       "Field name <fieldName> is invalid: <path> is not a struct."
@@ -2491,16 +2509,6 @@
       "The second argument should be a double literal."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1105" : {
-    "message" : [
-      "Field name should be String Literal, but it's <extraction>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1106" : {
-    "message" : [
-      "Can't extract value from <child>: need struct type but got <other>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1107" : {
     "message" : [
       "Table <table> declares <batchWrite> capability but <v2WriteClassName> is not an instance of <v1WriteClassName>."
@@ -2972,11 +2980,6 @@
       "The duration and time inputs to window must be an integer, long or string literal."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1209" : {
-    "message" : [
-      "Ambiguous reference to fields <fields>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1210" : {
     "message" : [
       "The second argument in <funcName> should be a boolean literal."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index d0ef5365bc9..e22af21daaa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -64,7 +64,7 @@ object ExtractValue {
 
       case (_: ArrayType, _) => GetArrayItem(child, extraction)
 
-      case (MapType(kt, _, _), _) => GetMapValue(child, extraction)
+      case (MapType(_, _, _), _) => GetMapValue(child, extraction)
 
       case (otherType, _) =>
         throw QueryCompilationErrors.dataTypeUnsupportedByExtractValueError(
@@ -82,8 +82,8 @@ object ExtractValue {
     if (ordinal == -1) {
       throw QueryCompilationErrors.noSuchStructFieldInGivenFieldsError(fieldName, fields)
     } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
-      throw QueryCompilationErrors.ambiguousReferenceToFieldsError(
-        fields.filter(checkField).mkString(", "))
+      val numberOfAppearance = fields.count(checkField)
+      throw QueryCompilationErrors.ambiguousReferenceToFieldsError(fieldName, numberOfAppearance)
     } else {
       ordinal
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 0bc3b2fc4f6..de0a0124767 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1117,14 +1117,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
     dataType match {
       case StructType(_) =>
         new AnalysisException(
-          errorClass = "_LEGACY_ERROR_TEMP_1105",
-          messageParameters = Map("extraction" -> extraction.toString))
+          errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+          messageParameters = Map("extraction" -> toSQLExpr(extraction)))
       case other =>
         new AnalysisException(
-          errorClass = "_LEGACY_ERROR_TEMP_1106",
+          errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
           messageParameters = Map(
-            "child" -> child.toString,
-            "other" -> other.catalogString))
+            "base" -> toSQLExpr(child),
+            "other" -> toSQLType(other)))
     }
   }
 
@@ -2099,10 +2099,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
         "fields" -> fields.map(f => toSQLId(f.name)).mkString(", ")))
   }
 
-  def ambiguousReferenceToFieldsError(fields: String): Throwable = {
+  def ambiguousReferenceToFieldsError(field: String, numberOfAppearance: Int): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1209",
-      messageParameters = Map("fields" -> fields))
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      messageParameters = Map("field" -> toSQLId(field), "count" -> numberOfAppearance.toString))
   }
 
   def secondArgumentInFunctionIsNotBooleanLiteralError(funcName: String): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 71d3deb36c2..cbd6749807f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -370,17 +370,25 @@ class AnalysisErrorSuite extends AnalysisTest {
       "expressionAnyValue" -> "\"any_value(b)\"")
   )
 
-  errorTest(
+  errorClassTest(
     "ambiguous field",
     nestedRelation.select($"top.duplicateField"),
-    "Ambiguous reference to fields" :: "duplicateField" :: Nil,
-    caseSensitive = false)
+    errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+    messageParameters = Map(
+      "field" -> "`duplicateField`",
+      "count" -> "2"),
+    caseSensitive = false
+  )
 
-  errorTest(
+  errorClassTest(
     "ambiguous field due to case insensitivity",
     nestedRelation.select($"top.differentCase"),
-    "Ambiguous reference to fields" :: "differentCase" :: "differentcase" :: Nil,
-    caseSensitive = false)
+    errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+    messageParameters = Map(
+      "field" -> "`differentCase`",
+      "count" -> "2"),
+    caseSensitive = false
+  )
 
   errorClassTest(
     "missing field",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index ecff40c0b3b..8dbca473cfb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -462,27 +462,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       1, create_row(create_row(1)))
   }
 
-  test("error message of ExtractValue") {
-    val structType = StructType(StructField("a", StringType, true) :: Nil)
-    val otherType = StringType
-
-    def checkErrorMessage(
-      childDataType: DataType,
-      fieldDataType: DataType,
-      errorMessage: String): Unit = {
-      val e = intercept[org.apache.spark.sql.AnalysisException] {
-        ExtractValue(
-          Literal.create(null, childDataType),
-          Literal.create(null, fieldDataType),
-          _ == _)
-      }
-      assert(e.getMessage().contains(errorMessage))
-    }
-
-    checkErrorMessage(structType, IntegerType, "Field name should be String Literal")
-    checkErrorMessage(otherType, StringType, "Can't extract value from")
-  }
-
   test("ensure to preserve metadata") {
     val metadata = new MetadataBuilder()
       .putString("key", "value")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 718405bd8ac..b93b643cbb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -1087,17 +1087,22 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should throw an exception if intermediate field reference is ambiguous") {
-    intercept[AnalysisException] {
-      val structLevel2: DataFrame = spark.createDataFrame(
-        sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
-        StructType(Seq(
-          StructField("a", StructType(Seq(
-            StructField("a", structType, nullable = false),
-            StructField("a", structType, nullable = false))),
-            nullable = false))))
+    checkError(
+      exception = intercept[AnalysisException] {
+        val structLevel2: DataFrame = spark.createDataFrame(
+          sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+          StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", structType, nullable = false),
+              StructField("a", structType, nullable = false))),
+              nullable = false))))
 
-      structLevel2.withColumn("a", $"a".withField("a.b", lit(2)))
-    }.getMessage should include("Ambiguous reference to fields")
+        structLevel2.withColumn("a", $"a".withField("a.b", lit(2)))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
   }
 
   test("withField should add field with no name") {
@@ -1647,10 +1652,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         .select($"struct_col".withField("a.c", lit(3))),
       Row(Row(Row(1, 2, 3))))
 
-    intercept[AnalysisException] {
-      sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
-        .select($"struct_col".withField("a.c", lit(3)))
-    }.getMessage should include("Ambiguous reference to fields")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+          .select($"struct_col".withField("a.c", lit(3)))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
 
     checkAnswer(
       sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
@@ -1862,17 +1872,22 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("dropFields should throw an exception if intermediate field reference is ambiguous") {
-    intercept[AnalysisException] {
-      val structLevel2: DataFrame = spark.createDataFrame(
-        sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
-        StructType(Seq(
-          StructField("a", StructType(Seq(
-            StructField("a", structType, nullable = false),
-            StructField("a", structType, nullable = false))),
-            nullable = false))))
+    checkError(
+      exception = intercept[AnalysisException] {
+        val structLevel2: DataFrame = spark.createDataFrame(
+          sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+          StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", structType, nullable = false),
+              StructField("a", structType, nullable = false))),
+              nullable = false))))
 
-      structLevel2.withColumn("a", $"a".dropFields("a.b"))
-    }.getMessage should include("Ambiguous reference to fields")
+        structLevel2.withColumn("a", $"a".dropFields("a.b"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
   }
 
   test("dropFields should drop field in struct") {
@@ -2208,10 +2223,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         .select($"struct_col".dropFields("a.b")),
       Row(Row(Row(1))))
 
-    intercept[AnalysisException] {
-      sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
-        .select($"struct_col".dropFields("a.c"))
-    }.getMessage should include("Ambiguous reference to fields")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+          .select($"struct_col".dropFields("a.c"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
 
     checkAnswer(
       sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 55964afba74..47c4a86ccc5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtil
 import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
-import org.apache.spark.sql.functions.{from_json, grouping, grouping_id, lit, struct, sum, udf}
+import org.apache.spark.sql.functions.{array, from_json, grouping, grouping_id, lit, struct, sum, udf}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, MapType, StringType, StructField, StructType}
@@ -715,6 +715,73 @@ class QueryCompilationErrorsSuite
       )
     }
   }
+
+  test("AMBIGUOUS_REFERENCE_TO_FIELDS: select ambiguous field from struct") {
+    val data = Seq(
+      Row(Row("test1", "test1")),
+      Row(Row("test2", "test2")))
+
+    val schema = new StructType()
+      .add("name",
+        new StructType()
+          .add("firstname", StringType)
+          .add("firstname", StringType))
+
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"name.firstname")
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`firstname`", "count" -> "2")
+    )
+  }
+
+  test("INVALID_EXTRACT_BASE_FIELD_TYPE: select ambiguous field from struct") {
+    val df = Seq("test").toDF("firstname")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"firstname.test_field")
+      },
+      errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
+      sqlState = "42000",
+      parameters = Map("base" -> "\"firstname\"", "other" -> "\"STRING\""))
+  }
+
+  test("INVALID_EXTRACT_FIELD_TYPE: extract not string literal field") {
+    val structureData = Seq(
+      Row(Row(Row("test1", "test1"))),
+      Row(Row(Row("test2", "test2"))))
+
+    val structureSchema = new StructType()
+      .add("name",
+        new StructType()
+          .add("inner",
+            new StructType()
+              .add("firstname", StringType)
+              .add("lastname", StringType)))
+
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(structureData), structureSchema)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(struct($"name"(struct("test"))))
+      },
+      errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+      sqlState = "42000",
+      parameters = Map("extraction" -> "\"struct(test)\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"name"(array("test")))
+      },
+      errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+      sqlState = "42000",
+      parameters = Map("extraction" -> "\"array(test)\""))
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 72cfabb9246..8e2a5a66172 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -44,10 +44,14 @@ class HiveResolutionSuite extends HiveComparisonTest {
       .createOrReplaceTempView("nested")
 
     // there are 2 filed matching field name "b", we should report Ambiguous reference error
-    val exception = intercept[AnalysisException] {
-      sql("SELECT a[0].b from nested").queryExecution.analyzed
-    }
-    assert(exception.getMessage.contains("Ambiguous reference to fields"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT a[0].b from nested").queryExecution.analyzed
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`b`", "count" -> "2")
+    )
   }
 
   createQueryTest("table.attr",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org