You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/09/10 17:49:15 UTC
[GitHub] [spark] MaxGekk commented on a diff in pull request #37840: [SPARK-40394][SQL] Move subquery expression CheckAnalysis error messages to use the new error framework

MaxGekk commented on code in PR #37840:
URL: https://github.com/apache/spark/pull/37840#discussion_r967682732


##########
sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala:
##########
@@ -2005,13 +2018,17 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           |FROM (SELECT CAST(c1 AS STRING) a FROM t1)
           |""".stripMargin),
         Row(5) :: Row(null) :: Nil)
-      assert(intercept[AnalysisException] {
+      val msg1 = intercept[AnalysisException] {

Review Comment:
   It is an exception not a message.



##########
sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala:
##########
@@ -1983,9 +1992,13 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     withTempView("t1", "t2") {
       Seq((0, 1)).toDF("c1", "c2").createOrReplaceTempView("t1")
       Seq((1, 2), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
-      assert(intercept[AnalysisException] {
+      val msg1 = intercept[AnalysisException] {

Review Comment:
   Please, rename the val. It is not a message.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala:
##########
@@ -743,7 +743,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         case a: AggregateExpression => a
       })
       if (aggregates.isEmpty) {
-        failAnalysis("The output of a correlated scalar subquery must be aggregated")
+        throw new AnalysisException(

Review Comment:
   The current approach is to raise exception from `Query*Errors`. Could move this to `QueryCompilationErrors`, please.



##########
sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala:
##########
@@ -523,30 +523,39 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       val errMsg = intercept[AnalysisException] {
         sql("select (select sum(-1) from t t2 where t1.c2 = t2.c1 group by t2.c2) sum from t t1")
       }
-      assert(errMsg.getMessage.contains(
-        "A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
+      checkError(
+        errMsg,
+        errorClass = "INVALID_SUBQUERY_EXPRESSION",
+        errorSubClass = Some("NON_CORRELATED_COLUMNS_IN_GROUP_BY"),
+        parameters = Map("value" -> "c2"))
     }
   }
 
   test("non-aggregated correlated scalar subquery") {
     val msg1 = intercept[AnalysisException] {

Review Comment:
   ditto



##########
sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala:
##########
@@ -523,30 +523,39 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       val errMsg = intercept[AnalysisException] {

Review Comment:
   ditto



##########
sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala:
##########
@@ -523,30 +523,39 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       val errMsg = intercept[AnalysisException] {
         sql("select (select sum(-1) from t t2 where t1.c2 = t2.c1 group by t2.c2) sum from t t1")
       }
-      assert(errMsg.getMessage.contains(
-        "A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
+      checkError(
+        errMsg,
+        errorClass = "INVALID_SUBQUERY_EXPRESSION",
+        errorSubClass = Some("NON_CORRELATED_COLUMNS_IN_GROUP_BY"),
+        parameters = Map("value" -> "c2"))
     }
   }
 
   test("non-aggregated correlated scalar subquery") {
     val msg1 = intercept[AnalysisException] {
       sql("select a, (select b from l l2 where l2.a = l1.a) sum_b from l l1")
     }
-    assert(msg1.getMessage.contains("Correlated scalar subqueries must be aggregated"))
-
+    checkError(
+      msg1,
+      errorClass = "INVALID_SUBQUERY_EXPRESSION",
+      errorSubClass = Some("MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY"))
     val msg2 = intercept[AnalysisException] {
       sql("select a, (select b from l l2 where l2.a = l1.a group by 1) sum_b from l l1")
     }
-    assert(msg2.getMessage.contains(
-      "The output of a correlated scalar subquery must be aggregated"))
+    checkError(
+      msg2,
+      errorClass = "INVALID_SUBQUERY_EXPRESSION",
+      errorSubClass = Some("MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY_OUTPUT"))
   }
 
   test("non-equal correlated scalar subquery") {
     val msg1 = intercept[AnalysisException] {

Review Comment:
   ditto



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -327,6 +327,83 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery:"
+    ],
+    "subClass" : {
+      "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : {
+        "message" : [
+          "Accessing outer query column is not allowed in this location"
+        ]
+      },
+      "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES" : {
+        "message" : [
+          "Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: <function>"
+        ]
+      },
+      "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE" : {
+        "message" : [
+          "Correlated column is not allowed in predicate"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_IN_GROUP_BY_MUST_BE_IN_AGGREGATE_EXPRESSIONS" : {

Review Comment:
   Could you replace it by a shorter one, please. The error classes are placed to doc headers. The long error classes can cause some issues in formatting. cc @srielau 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org