You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/06 07:29:04 UTC
[spark] branch master updated: [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432]

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0cd5ca5a7b3 [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432]
0cd5ca5a7b3 is described below

commit 0cd5ca5a7b31f65a005c8ee2e90a6b4a29623ba7
Author: Jiaan Geng <be...@163.com>
AuthorDate: Tue Jun 6 10:28:48 2023 +0300

    [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432]
    
    ### What changes were proposed in this pull request?
    The pr aims to assign names to the error class `_LEGACY_ERROR_TEMP_[2426-2432]`.
    
    ### Why are the changes needed?
    Improve the error framework.
    
    ### Does this PR introduce _any_ user-facing change?
    'No'.
    
    ### How was this patch tested?
    Exists test cases.
    
    Closes #41424 from beliefer/SPARK-43913.
    
    Authored-by: Jiaan Geng <be...@163.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 58 ++++++++++++----------
 .../sql/catalyst/analysis/CheckAnalysis.scala      | 51 +++++++++++--------
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++----
 .../CreateTablePartitioningValidationSuite.scala   | 22 ++++----
 .../negative-cases/invalid-correlation.sql.out     |  6 ++-
 .../negative-cases/invalid-correlation.sql.out     |  6 ++-
 6 files changed, 93 insertions(+), 70 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index de80415d85b..8c3c076ce74 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -660,6 +660,11 @@
       "The event time <eventName> has the invalid type <eventType>, but expected \"TIMESTAMP\"."
     ]
   },
+  "EXPRESSION_TYPE_IS_NOT_ORDERABLE" : {
+    "message" : [
+      "Column expression <expr> cannot be sorted because its type <exprType> is not orderable."
+    ]
+  },
   "FAILED_EXECUTE_UDF" : {
     "message" : [
       "Failed to execute user defined function (<functionName>: (<signature>) => <result>)."
@@ -1541,6 +1546,24 @@
     ],
     "sqlState" : "42803"
   },
+  "MISSING_ATTRIBUTES" : {
+    "message" : [
+      "Resolved attribute(s) <missingAttributes> missing from <input> in operator <operator>."
+    ],
+    "subClass" : {
+      "RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION" : {
+        "message" : [
+          "Attribute(s) with the same name appear in the operation: <operation>.",
+          "Please check if the right attribute(s) are used."
+        ]
+      },
+      "RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT" : {
+        "message" : [
+          ""
+        ]
+      }
+    }
+  },
   "MISSING_GROUP_BY" : {
     "message" : [
       "The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses."
@@ -1945,6 +1968,11 @@
       "Query [id = <id>, runId = <runId>] terminated with exception: <message>"
     ]
   },
+  "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT" : {
+    "message" : [
+      "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
+    ]
+  },
   "TABLE_OR_VIEW_ALREADY_EXISTS" : {
     "message" : [
       "Cannot create table or view <relationName> because it already exists.",
@@ -2310,6 +2338,11 @@
           "Parameter markers in unexpected statement: <statement>. Parameter markers must only be used in a query, or DML statement."
         ]
       },
+      "PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED" : {
+        "message" : [
+          "Invalid partitioning: <cols> is missing or is in a map or array."
+        ]
+      },
       "PIVOT_AFTER_GROUP_BY" : {
         "message" : [
           "PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery."
@@ -5525,31 +5558,6 @@
       "failed to evaluate expression <sqlExpr>: <msg>"
     ]
   },
-  "_LEGACY_ERROR_TEMP_2426" : {
-    "message" : [
-      "nondeterministic expression <sqlExpr> should not appear in grouping expression."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2427" : {
-    "message" : [
-      "sorting is not supported for columns of type <type>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2428" : {
-    "message" : [
-      "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2431" : {
-    "message" : [
-      "Invalid partitioning: <cols> is missing or is in a map or array."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2432" : {
-    "message" : [
-      "<msg>"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2433" : {
     "message" : [
       "Only a single table generating function is allowed in a SELECT clause, found:",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 594c0b666e8..9124890d4af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -474,9 +474,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 // This is just a sanity check, our analysis rule PullOutNondeterministic should
                 // already pull out those nondeterministic expressions and evaluate them in
                 // a Project node.
-                expr.failAnalysis(
-                  errorClass = "_LEGACY_ERROR_TEMP_2426",
-                  messageParameters = Map("sqlExpr" -> expr.sql))
+                throw SparkException.internalError(
+                  msg = s"Non-deterministic expression '${toSQLExpr(expr)}' should not appear in " +
+                    "grouping expression.",
+                  context = expr.origin.getQueryContext,
+                  summary = expr.origin.context.summary)
               }
             }
 
@@ -545,8 +547,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             orders.foreach { order =>
               if (!RowOrdering.isOrderable(order.dataType)) {
                 order.failAnalysis(
-                  errorClass = "_LEGACY_ERROR_TEMP_2427",
-                  messageParameters = Map("type" -> order.dataType.catalogString))
+                  errorClass = "EXPRESSION_TYPE_IS_NOT_ORDERABLE",
+                  messageParameters = Map("exprType" -> toSQLType(order.dataType)))
               }
             }
 
@@ -560,7 +562,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 val offset = offsetExpr.eval().asInstanceOf[Int]
                 if (Int.MaxValue - limit < offset) {
                   child.failAnalysis(
-                    errorClass = "_LEGACY_ERROR_TEMP_2428",
+                    errorClass = "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT",
                     messageParameters = Map(
                       "limit" -> limit.toString,
                       "offset" -> offset.toString))
@@ -624,8 +626,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
 
             if (badReferences.nonEmpty) {
               create.failAnalysis(
-                errorClass = "_LEGACY_ERROR_TEMP_2431",
-                messageParameters = Map("cols" -> badReferences.mkString(", ")))
+                errorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+                messageParameters = Map(
+                  "cols" -> badReferences.map(r => toSQLId(r)).mkString(", ")))
             }
 
             create.tableSchema.foreach(f => TypeUtils.failWithIntervalType(f.dataType))
@@ -641,28 +644,34 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
 
         operator match {
           case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
-            val missingAttributes = o.missingInput.mkString(",")
-            val input = o.inputSet.mkString(",")
-            val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " +
-              s"from $input in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}."
+            val missingAttributes = o.missingInput.map(attr => toSQLExpr(attr)).mkString(", ")
+            val input = o.inputSet.map(attr => toSQLExpr(attr)).mkString(", ")
 
             val resolver = plan.conf.resolver
             val attrsWithSameName = o.missingInput.filter { missing =>
               o.inputSet.exists(input => resolver(missing.name, input.name))
             }
 
-            val msg = if (attrsWithSameName.nonEmpty) {
-              val sameNames = attrsWithSameName.map(_.name).mkString(",")
-              s"$msgForMissingAttributes Attribute(s) with the same name appear in the " +
-                s"operation: $sameNames. Please check if the right attribute(s) are used."
+            if (attrsWithSameName.nonEmpty) {
+              val sameNames = attrsWithSameName.map(attr => toSQLExpr(attr)).mkString(", ")
+              o.failAnalysis(
+                errorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION",
+                messageParameters = Map(
+                  "missingAttributes" -> missingAttributes,
+                  "input" -> input,
+                  "operator" -> operator.simpleString(SQLConf.get.maxToStringFields),
+                  "operation" -> sameNames
+                ))
             } else {
-              msgForMissingAttributes
+              o.failAnalysis(
+                errorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT",
+                messageParameters = Map(
+                  "missingAttributes" -> missingAttributes,
+                  "input" -> input,
+                  "operator" -> operator.simpleString(SQLConf.get.maxToStringFields)
+                ))
             }
 
-            o.failAnalysis(
-              errorClass = "_LEGACY_ERROR_TEMP_2432",
-              messageParameters = Map("msg" -> msg))
-
           case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
             p.failAnalysis(
               errorClass = "_LEGACY_ERROR_TEMP_2433",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 5f6256881a4..b657dd55eb7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -721,7 +721,7 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorClassTest(
     "the sum of num_rows in limit clause and num_rows in offset clause less than Int.MaxValue",
     testRelation.offset(Literal(2000000000, IntegerType)).limit(Literal(1000000000, IntegerType)),
-    "_LEGACY_ERROR_TEMP_2428",
+    "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT",
     Map("limit" -> "1000000000", "offset" -> "2000000000"))
 
   errorTest(
@@ -770,14 +770,16 @@ class AnalysisErrorSuite extends AnalysisTest {
 
     assert(plan.resolved)
 
-    val resolved = s"${attrA.toString},${attrC.toString}"
-
-    val errorMsg = s"Resolved attribute(s) $resolved missing from ${otherA.toString} " +
-                     s"in operator !Aggregate [${aliases.mkString(", ")}]. " +
-                     s"Attribute(s) with the same name appear in the operation: a. " +
-                     "Please check if the right attribute(s) are used."
-
-    assertAnalysisError(plan, errorMsg :: Nil)
+    assertAnalysisErrorClass(
+      inputPlan = plan,
+      expectedErrorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION",
+      expectedMessageParameters = Map(
+        "missingAttributes" -> "\"a\", \"c\"",
+        "input" -> "\"a\"",
+        "operator" -> s"!Aggregate [${aliases.mkString(", ")}]",
+        "operation" -> "\"a\""
+      )
+    )
   }
 
   test("error test for self-join") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index 67441e18b0f..ba312ddbc49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -40,9 +40,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisError(plan, Seq(
-      "Invalid partitioning",
-      "does_not_exist is missing or is in a map or array"))
+    assertAnalysisErrorClass(plan,
+      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+      expectedMessageParameters = Map("cols" -> "`does_not_exist`"))
   }
 
   test("CreateTableAsSelect: fail missing top-level column nested reference") {
@@ -57,9 +57,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisError(plan, Seq(
-      "Invalid partitioning",
-      "does_not_exist.z is missing or is in a map or array"))
+    assertAnalysisErrorClass(plan,
+      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+      expectedMessageParameters = Map("cols" -> "`does_not_exist`.`z`"))
   }
 
   test("CreateTableAsSelect: fail missing nested column") {
@@ -74,9 +74,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisError(plan, Seq(
-      "Invalid partitioning",
-      "point.z is missing or is in a map or array"))
+    assertAnalysisErrorClass(plan,
+      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+      expectedMessageParameters = Map("cols" -> "`point`.`z`"))
   }
 
   test("CreateTableAsSelect: fail with multiple errors") {
@@ -92,8 +92,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
 
     assert(!plan.resolved)
     assertAnalysisErrorClass(plan,
-      expectedErrorClass = "_LEGACY_ERROR_TEMP_2431",
-      expectedMessageParameters = Map("cols" -> "does_not_exist, point.z"))
+      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+      expectedMessageParameters = Map("cols" -> "`does_not_exist`, `point`.`z`"))
   }
 
   test("CreateTableAsSelect: success with top-level column") {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
index 08ddc2cfcd2..2992bc6c9a1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
@@ -76,9 +76,11 @@ WHERE  t1a IN (SELECT   min(t2a)
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2432",
+  "errorClass" : "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT",
   "messageParameters" : {
-    "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])."
+    "input" : "\"min(t2a)\", \"t2c\"",
+    "missingAttributes" : "\"t2b\"",
+    "operator" : "!Filter t2c#x IN (list#x [t2b#x])"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index 39b4f87bb1b..d1eb86a2d33 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -71,9 +71,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2432",
+  "errorClass" : "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT",
   "messageParameters" : {
-    "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])."
+    "input" : "\"min(t2a)\", \"t2c\"",
+    "missingAttributes" : "\"t2b\"",
+    "operator" : "!Filter t2c#x IN (list#x [t2b#x])"
   },
   "queryContext" : [ {
     "objectType" : "",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org