You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/09/09 14:51:21 UTC

[GitHub] [spark] allisonwang-db commented on a diff in pull request #37840: [SPARK-40394][SQL] Move subquery expression CheckAnalysis error messages to use the new error framework

allisonwang-db commented on code in PR #37840:
URL: https://github.com/apache/spark/pull/37840#discussion_r967150145


##########
core/src/main/resources/error/error-classes.json:
##########
@@ -327,6 +327,83 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery expression"

Review Comment:
   ```suggestion
         "Invalid subquery: "
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala:
##########
@@ -923,7 +966,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     // Make sure a plan's subtree does not contain outer references
     def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = {
       if (hasOuterReferences(p)) {
-        failAnalysis(s"Accessing outer query column is not allowed in:\n$p")
+        throw new AnalysisException(
+          errorClass = "INVALID_SUBQUERY_EXPRESSION",
+          errorSubClass = "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",

Review Comment:
   ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -327,6 +327,83 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery expression"
+    ],
+    "subClass" : {
+      "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : {
+        "message" : [
+          "Accessing outer query column is not allowed in this location"
+        ]
+      },
+      "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES" : {
+        "message" : [
+          "Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: <function>"
+        ]
+      },
+      "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE" : {
+        "message" : [
+          "Correlated column is not allowed in predicate <predicate>"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_IN_GROUP_BY_MUST_BE_IN_AGGREGATE_EXPRESSIONS" : {
+        "message" : [
+          "Correlated scalar subqueries in the GROUP BY clause must also be in the aggregate expressions"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "EXPRESSIONS_REFERENCING_OUTER_QUERY_COLUMN_ONLY_ALLOWED_IN_WHERE_HAVING" : {
+        "message" : [
+          "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"
+        ]
+      },
+      "IN_EXISTS_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "IN/EXISTS predicate subqueries can only be used in filters, joins, aggregations, window functions, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "LATERAL_JOIN_CONDITION_NON_DETERMINISTIC" : {
+        "message" : [
+          "Lateral join condition cannot be non-deterministic: <condition>"
+        ]
+      },
+      "MORE_THAN_ONE_OUTPUT_COLUMN" : {

Review Comment:
   ```suggestion
         "SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN" : {
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala:
##########
@@ -923,7 +966,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     // Make sure a plan's subtree does not contain outer references
     def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = {
       if (hasOuterReferences(p)) {
-        failAnalysis(s"Accessing outer query column is not allowed in:\n$p")
+        throw new AnalysisException(
+          errorClass = "INVALID_SUBQUERY_EXPRESSION",
+          errorSubClass = "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+          origin = p.origin,
+          messageParameters = Array.empty[String])

Review Comment:
   Should we include the query plan here? Otherwise, it's hard to tell what's wrong.



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -327,6 +327,83 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery expression"
+    ],
+    "subClass" : {
+      "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : {
+        "message" : [
+          "Accessing outer query column is not allowed in this location"
+        ]
+      },
+      "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES" : {
+        "message" : [
+          "Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: <function>"
+        ]
+      },
+      "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE" : {
+        "message" : [
+          "Correlated column is not allowed in predicate <predicate>"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_IN_GROUP_BY_MUST_BE_IN_AGGREGATE_EXPRESSIONS" : {
+        "message" : [
+          "Correlated scalar subqueries in the GROUP BY clause must also be in the aggregate expressions"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "EXPRESSIONS_REFERENCING_OUTER_QUERY_COLUMN_ONLY_ALLOWED_IN_WHERE_HAVING" : {
+        "message" : [
+          "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"
+        ]
+      },
+      "IN_EXISTS_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {

Review Comment:
   This list might change in the future. We can make the name more generic.



##########
sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out:
##########
@@ -323,14 +322,10 @@ SELECT * FROM t1, LATERAL (SELECT rand(0) FROM t2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row
-SubqueryAlias __auto_generated_subquery_name
-+- Project [rand(0) AS rand(0)#x]
-   +- SubqueryAlias spark_catalog.default.t2
-      +- View (`spark_catalog`.`default`.`t2`, [c1#x,c2#x])
-         +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
-            +- LocalRelation [col1#x, col2#x]
-; line 1 pos 9

Review Comment:
   The problematic query plan will not get printed out after this change?



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -327,6 +327,83 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery expression"
+    ],
+    "subClass" : {
+      "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : {
+        "message" : [
+          "Accessing outer query column is not allowed in this location"
+        ]
+      },
+      "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES" : {
+        "message" : [
+          "Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: <function>"
+        ]
+      },
+      "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE" : {
+        "message" : [
+          "Correlated column is not allowed in predicate <predicate>"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_IN_GROUP_BY_MUST_BE_IN_AGGREGATE_EXPRESSIONS" : {
+        "message" : [
+          "Correlated scalar subqueries in the GROUP BY clause must also be in the aggregate expressions"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "EXPRESSIONS_REFERENCING_OUTER_QUERY_COLUMN_ONLY_ALLOWED_IN_WHERE_HAVING" : {
+        "message" : [
+          "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"
+        ]
+      },
+      "IN_EXISTS_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "IN/EXISTS predicate subqueries can only be used in filters, joins, aggregations, window functions, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "LATERAL_JOIN_CONDITION_NON_DETERMINISTIC" : {
+        "message" : [
+          "Lateral join condition cannot be non-deterministic: <condition>"
+        ]
+      },
+      "MORE_THAN_ONE_OUTPUT_COLUMN" : {
+        "message" : [
+          "Scalar subquery must return only one column, but got <number>"
+        ]
+      },
+      "MUST_AGGREGATE_CORRELATED_SUBQUERY" : {
+        "message" : [
+          "Correlated scalar subqueries must be aggregated"
+        ]
+      },
+      "MUST_AGGREGATE_CORRELATED_SUBQUERY_OUTPUT" : {

Review Comment:
   This looks very similar to the one above. Maybe we can unify them later.



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -327,6 +327,83 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery expression"
+    ],
+    "subClass" : {
+      "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : {
+        "message" : [
+          "Accessing outer query column is not allowed in this location"
+        ]
+      },
+      "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES" : {
+        "message" : [
+          "Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: <function>"
+        ]
+      },
+      "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE" : {
+        "message" : [
+          "Correlated column is not allowed in predicate <predicate>"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_IN_GROUP_BY_MUST_BE_IN_AGGREGATE_EXPRESSIONS" : {
+        "message" : [
+          "Correlated scalar subqueries in the GROUP BY clause must also be in the aggregate expressions"
+        ]
+      },
+      "CORRELATED_SCALAR_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "EXPRESSIONS_REFERENCING_OUTER_QUERY_COLUMN_ONLY_ALLOWED_IN_WHERE_HAVING" : {
+        "message" : [
+          "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"
+        ]
+      },
+      "IN_EXISTS_SUBQUERIES_ONLY_IN_FILTER_AGGREGATE_PROJECT" : {
+        "message" : [
+          "IN/EXISTS predicate subqueries can only be used in filters, joins, aggregations, window functions, projections, and UPDATE/MERGE/DELETE commands"
+        ]
+      },
+      "LATERAL_JOIN_CONDITION_NON_DETERMINISTIC" : {
+        "message" : [
+          "Lateral join condition cannot be non-deterministic: <condition>"
+        ]
+      },
+      "MORE_THAN_ONE_OUTPUT_COLUMN" : {
+        "message" : [
+          "Scalar subquery must return only one column, but got <number>"
+        ]
+      },
+      "MUST_AGGREGATE_CORRELATED_SUBQUERY" : {

Review Comment:
   ```suggestion
         "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY" : {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org