You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/04/07 02:02:59 UTC

[GitHub] [spark] maropu commented on a change in pull request #32054: [SPARK-34946][SQL] Block unsupported correlated scalar subquery in Aggregate

maropu commented on a change in pull request #32054:
URL: https://github.com/apache/spark/pull/32054#discussion_r608289035



##########
File path: sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
##########
@@ -1765,4 +1765,35 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-34946: correlated scalar subquery in grouping expressions only") {

Review comment:
       Could you move these tests into `AnalysisErrorSuite`?

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
##########
@@ -756,7 +767,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           // Only certain operators are allowed to host subquery expression containing
           // outer references.
           plan match {
-            case _: Filter | _: Aggregate | _: Project | _: SupportsSubquery => // Ok
+            case _: Filter | _: Project | _: SupportsSubquery => // Ok
+            case a: Aggregate =>
+              // If the correlated scalar subquery is in the grouping expressions of an Aggregate,
+              // it must also be in the aggregate expressions to be rewritten in the optimization
+              // phase.
+              if (containsExpr(a.groupingExpressions) && !containsExpr(a.aggregateExpressions)) {
+                failAnalysis(s"Correlated scalar subqueries in the group by clause " +

Review comment:
       nit: remove `s`.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
##########
@@ -305,6 +305,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                     s"nor is it an aggregate function. " +
                     "Add to group by or wrap in first() (or first_value) if you don't care " +
                     "which value you get.")
+              case s: ScalarSubquery
+                  if s.children.nonEmpty && !groupingExprs.exists(_.semanticEquals(s)) =>
+                failAnalysis(s"Correlated scalar subquery '${s.sql}' is neither " +
+                  s"present in the group by, nor in an aggregate function. Add it to group by " +
+                  s"using ordinal position or wrap it in first() (or first_value) if you don't " +
+                  s"care which value you get.")

Review comment:
       nit: remove `s`

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
##########
@@ -305,6 +305,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                     s"nor is it an aggregate function. " +
                     "Add to group by or wrap in first() (or first_value) if you don't care " +
                     "which value you get.")
+              case s: ScalarSubquery
+                  if s.children.nonEmpty && !groupingExprs.exists(_.semanticEquals(s)) =>
+                failAnalysis(s"Correlated scalar subquery '${s.sql}' is neither " +
+                  s"present in the group by, nor in an aggregate function. Add it to group by " +
+                  s"using ordinal position or wrap it in first() (or first_value) if you don't " +
+                  s"care which value you get.")

Review comment:
       Could you add an explicit assert to check if group-by has the same subquery with agg exprs?
   https://github.com/apache/spark/blob/aff6c0febb40d9713895ba00d8c77ba00f04bd16/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala#L618-L624




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org