You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/12/08 21:38:54 UTC

[GitHub] [spark] ahshahid commented on a diff in pull request #38714: [WIP][SPARK-41141]. avoid introducing a new aggregate expression in the analysis phase when subquery is referencing it

ahshahid commented on code in PR #38714:
URL: https://github.com/apache/spark/pull/38714#discussion_r1043850142


##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala:
##########
@@ -270,4 +277,66 @@ class ResolveSubquerySuite extends AnalysisTest {
       ), Seq(a, b)).as("sub") :: Nil, t1)
     )
   }
+
+  test("SPARK-41141 aggregates of outer query referenced in subquery should not create" +
+    " new aggregates if possible") {
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> s"${PropagateEmptyRelation.ruleName}") {
+      val a = 'a.int
+      val b = 'b.int
+      val c = 'c.int
+      val d = 'd.int
+
+      val t1 = LocalRelation(a, b)
+      val t2 = LocalRelation(c, d)
+      val optimizer = new SimpleTestOptimizer()
+
+      val plansToTest = Seq(
+        t1.select($"a", $"b").
+          having($"b")(Cos(sum($"a")))(Exists(t2.select($"c").
+            where($"d" === Cos(sum($"a"))))) -> 1,
+
+        t1.select($"a", $"b").
+          having($"b")(sum($"a"))(Exists(t2.select($"c").
+            where($"d" === Cos(sum($"a"))))) -> 1,
+        t1.select($"a", $"b").
+          having($"b")(Cos(sum($"a")))(Exists(t2.select($"c").
+            where($"d" === sum($"a")))) -> 2,
+        t1.select($"a", $"b").
+          having($"b")(sum($"a"), Cos(sum($"b")))(Exists(t2.select($"c").
+            where($"d" === Cos(sum($"a")) + sum($"a") + sum($"b") + Cos(sum($"b"))))) -> 3
+      )
+
+      plansToTest.foreach {
+        case (logicalPlan: LogicalPlan, numAggFunctions) =>
+          assertAnalysis(logicalPlan, numAggFunctions)
+      }
+
+      def assertAnalysis(logicalPlan: LogicalPlan, expectedAggregateFunctions: Int): Unit = {
+        val analyzedQuery = logicalPlan.analyze
+        Assert.assertTrue(analyzedQuery.analyzed)
+        val optimizedQuery = optimizer.execute(analyzedQuery)

Review Comment:
   @peter-toth .. updated the test & implemented the feedback for handling cases like  (1 + sum(x) ) being used in inner query. pls check..



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org