You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/06/22 15:52:38 UTC

[spark] branch branch-3.3 updated: [SPARK-38687][SQL][3.3] Use error classes in the compilation errors of generators

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new b39ed56193e [SPARK-38687][SQL][3.3] Use error classes in the compilation errors of generators
b39ed56193e is described below

commit b39ed56193ea0ade80960cd920536426d85680f1
Author: panbingkun <pb...@gmail.com>
AuthorDate: Wed Jun 22 18:52:14 2022 +0300

    [SPARK-38687][SQL][3.3] Use error classes in the compilation errors of generators
    
    ## What changes were proposed in this pull request?
    Migrate the following errors in QueryCompilationErrors onto use error classes:
    
    - nestedGeneratorError => UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS
    - moreThanOneGeneratorError => UNSUPPORTED_GENERATOR.MULTI_GENERATOR
    - generatorOutsideSelectError => UNSUPPORTED_GENERATOR.OUTSIDE_SELECT
    - generatorNotExpectedError => UNSUPPORTED_GENERATOR.NOT_GENERATOR
    
    This is a backport of https://github.com/apache/spark/pull/36617.
    
    ### Why are the changes needed?
    Porting compilation errors of generator to new error framework, improve test coverage, and document expected error messages in tests.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    By running new test:
    ```
    $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
    ```
    
    Closes #36956 from panbingkun/branch-3.3-SPARK-38687.
    
    Authored-by: panbingkun <pb...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 17 ++++++++
 .../spark/sql/errors/QueryCompilationErrors.scala  | 22 +++++------
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 23 +++++------
 .../apache/spark/sql/GeneratorFunctionSuite.scala  |  9 +++--
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 46 ++++++++++++++++++++++
 5 files changed, 90 insertions(+), 27 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 5eab18dfd35..31ec5aaa05e 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -198,6 +198,23 @@
     "message" : [ "The feature is not supported: <feature>" ],
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_GENERATOR" : {
+    "message" : [ "The generator is not supported: " ],
+    "subClass" : {
+      "MULTI_GENERATOR" : {
+        "message" : [ "only one generator allowed per <clause> clause but found <num>: <generators>" ]
+      },
+      "NESTED_IN_EXPRESSIONS" : {
+        "message" : [ "nested in expressions <expression>" ]
+      },
+      "NOT_GENERATOR" : {
+        "message" : [ "<functionName> is expected to be a generator. However, its class is <classCanonicalName>, which is not a generator." ]
+      },
+      "OUTSIDE_SELECT" : {
+        "message" : [ "outside the SELECT clause, found: <plan>" ]
+      }
+    }
+  },
   "UNSUPPORTED_GROUPING_EXPRESSION" : {
     "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ]
   },
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 04b1d5f796d..6946f9dfc98 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, LogicalPlan, SerdeInfo, Window}
 import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode}
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, UnboundFunction}
@@ -113,21 +113,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = {
-    new AnalysisException(
-      "Generators are not supported when it's nested in " +
-        "expressions, but got: " + toPrettySQL(trimmedNestedGenerator))
+    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
+      messageParameters = Array("NESTED_IN_EXPRESSIONS", toSQLExpr(trimmedNestedGenerator)))
   }
 
   def moreThanOneGeneratorError(generators: Seq[Expression], clause: String): Throwable = {
-    new AnalysisException(
-      s"Only one generator allowed per $clause clause but found " +
-        generators.size + ": " + generators.map(toPrettySQL).mkString(", "))
+    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
+      messageParameters = Array("MULTI_GENERATOR",
+        clause, generators.size.toString, generators.map(toSQLExpr).mkString(", ")))
   }
 
   def generatorOutsideSelectError(plan: LogicalPlan): Throwable = {
-    new AnalysisException(
-      "Generators are not supported outside the SELECT clause, but " +
-        "got: " + plan.simpleString(SQLConf.get.maxToStringFields))
+    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
+      messageParameters = Array("OUTSIDE_SELECT", plan.simpleString(SQLConf.get.maxToStringFields)))
   }
 
   def legacyStoreAssignmentPolicyError(): Throwable = {
@@ -324,8 +322,8 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = {
-    new AnalysisException(s"$name is expected to be a generator. However, " +
-      s"its class is $classCanonicalName, which is not a generator.")
+    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
+      messageParameters = Array("NOT_GENERATOR", toSQLId(name.toString), classCanonicalName))
   }
 
   def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index a5b8663f5e6..89034432452 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -483,7 +483,7 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "generator nested in expressions",
     listRelation.select(Explode($"list") + 1),
-    "Generators are not supported when it's nested in expressions, but got: (explode(list) + 1)"
+    "The generator is not supported: nested in expressions \"(explode(list) + 1)\""
       :: Nil
   )
 
@@ -494,29 +494,29 @@ class AnalysisErrorSuite extends AnalysisTest {
         AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))())
       nestedListRelation.select(Explode(Explode($"nestedList")))
     },
-    "Generators are not supported when it's nested in expressions, but got: " +
-      "explode(explode(nestedList))" :: Nil
+    "The generator is not supported: nested in expressions " +
+    """"explode(explode(nestedList))"""" :: Nil
   )
 
   errorTest(
     "SPARK-30998: unsupported nested inner generators for aggregates",
     testRelation.select(Explode(Explode(
       CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))),
-    "Generators are not supported when it's nested in expressions, but got: " +
-      "explode(explode(array(array(min(a), max(a)))))" :: Nil
+    "The generator is not supported: nested in expressions " +
+    """"explode(explode(array(array(min(a), max(a)))))"""" :: Nil
   )
 
   errorTest(
     "generator nested in expressions for aggregates",
     testRelation.select(Explode(CreateArray(min($"a") :: max($"a") :: Nil)) + 1),
-    "Generators are not supported when it's nested in expressions, but got: " +
-      "(explode(array(min(a), max(a))) + 1)" :: Nil
+    "The generator is not supported: nested in expressions " +
+    """"(explode(array(min(a), max(a))) + 1)"""" :: Nil
   )
 
   errorTest(
     "generator appears in operator which is not Project",
     listRelation.sortBy(Explode($"list").asc),
-    "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
+    "The generator is not supported: outside the SELECT clause, found: Sort" :: Nil
   )
 
   errorTest(
@@ -534,15 +534,16 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "more than one generators in SELECT",
     listRelation.select(Explode($"list"), Explode($"list")),
-    "Only one generator allowed per select clause but found 2: explode(list), explode(list)" :: Nil
+    "The generator is not supported: only one generator allowed per select clause but found 2: " +
+    """"explode(list)", "explode(list)"""" :: Nil
   )
 
   errorTest(
     "more than one generators for aggregates in SELECT",
     testRelation.select(Explode(CreateArray(min($"a") :: Nil)),
       Explode(CreateArray(max($"a") :: Nil))),
-    "Only one generator allowed per select clause but found 2: " +
-      "explode(array(min(a))), explode(array(max(a)))" :: Nil
+    "The generator is not supported: only one generator allowed per select clause but found 2: " +
+    """"explode(array(min(a)))", "explode(array(max(a)))"""" :: Nil
   )
 
   errorTest(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 09afedea7a5..08280c08cd2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -332,7 +332,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       val msg1 = intercept[AnalysisException] {
         sql("select 1 + explode(array(min(c2), max(c2))) from t1 group by c1")
       }.getMessage
-      assert(msg1.contains("Generators are not supported when it's nested in expressions"))
+      assert(msg1.contains("The generator is not supported: nested in expressions"))
 
       val msg2 = intercept[AnalysisException] {
         sql(
@@ -342,7 +342,8 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
             |from t1 group by c1
           """.stripMargin)
       }.getMessage
-      assert(msg2.contains("Only one generator allowed per aggregate clause"))
+      assert(msg2.contains("The generator is not supported: " +
+        "only one generator allowed per aggregate clause"))
     }
   }
 
@@ -350,8 +351,8 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val errMsg = intercept[AnalysisException] {
       sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
     }.getMessage
-    assert(errMsg.contains("Generators are not supported when it's nested in expressions, " +
-      "but got: explode(explode(v))"))
+    assert(errMsg.contains("The generator is not supported: " +
+      "nested in expressions \"explode(explode(v))\""))
   }
 
   test("SPARK-30997: generators in aggregate expressions for dataframe") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 3da5202a2ad..9e18e4e6692 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -205,4 +205,50 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
       "The deserializer is not supported: try to map \"STRUCT<a: STRING, b: INT>\" " +
       "to Tuple1, but failed as the number of fields does not line up.")
   }
+
+  test("UNSUPPORTED_GENERATOR: " +
+    "generators are not supported when it's nested in expressions") {
+    val e = intercept[AnalysisException](
+      sql("""select explode(Array(1, 2, 3)) + 1""").collect()
+    )
+    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
+    assert(e.message ===
+      """The generator is not supported: """ +
+      """nested in expressions "(explode(array(1, 2, 3)) + 1)"""")
+  }
+
+  test("UNSUPPORTED_GENERATOR: only one generator allowed") {
+    val e = intercept[AnalysisException](
+      sql("""select explode(Array(1, 2, 3)), explode(Array(1, 2, 3))""").collect()
+    )
+    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
+    assert(e.message ===
+      "The generator is not supported: only one generator allowed per select clause " +
+      """but found 2: "explode(array(1, 2, 3))", "explode(array(1, 2, 3))"""")
+  }
+
+  test("UNSUPPORTED_GENERATOR: generators are not supported outside the SELECT clause") {
+    val e = intercept[AnalysisException](
+      sql("""select 1 from t order by explode(Array(1, 2, 3))""").collect()
+    )
+    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
+    assert(e.message ===
+      "The generator is not supported: outside the SELECT clause, found: " +
+      "'Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true")
+  }
+
+  test("UNSUPPORTED_GENERATOR: not a generator") {
+    val e = intercept[AnalysisException](
+      sql(
+        """
+          |SELECT explodedvalue.*
+          |FROM VALUES array(1, 2, 3) AS (value)
+          |LATERAL VIEW array_contains(value, 1) AS explodedvalue""".stripMargin).collect()
+    )
+    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
+    assert(e.message ===
+      """The generator is not supported: `array_contains` is expected to be a generator. """ +
+      "However, its class is org.apache.spark.sql.catalyst.expressions.ArrayContains, " +
+      "which is not a generator.")
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org