You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/02/08 04:45:15 UTC

[GitHub] [spark] Ngone51 commented on a change in pull request #31485: [SPARK-34137][SQL] Update suquery's stats when build LogicalPlan's stats

Ngone51 commented on a change in pull request #31485:
URL: https://github.com/apache/spark/pull/31485#discussion_r571777858



##########
File path: sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
##########
@@ -678,4 +680,50 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       }
     }
   }
+
+  test("SPARK-34137: Update suquery's stats when build LogicalPlan's stats") {
+    withTable("t1", "t2") {
+      sql("create table t1 using parquet as select id as a, id as b from range(1000)")
+      sql("create table t2 using parquet as select id as c, id as d from range(2000)")
+
+      sql("ANALYZE TABLE t1 COMPUTE STATISTICS FOR ALL COLUMNS")
+      sql("ANALYZE TABLE t2 COMPUTE STATISTICS FOR ALL COLUMNS")
+      sql("set spark.sql.cbo.enabled=true")
+
+      val df = sql(
+        """
+          |WITH max_store_sales AS
+          |(
+          |  SELECT max(csales) tpcds_cmax
+          |  FROM (
+          |    SELECT sum(b) csales
+          |    FROM t1 WHERE a < 100
+          |  ) x
+          |),
+          |best_ss_customer AS
+          |(
+          |  SELECT c
+          |  FROM t2
+          |  WHERE d > (SELECT * FROM max_store_sales)
+          |)
+          |SELECT c FROM best_ss_customer
+          |""".stripMargin)
+      val optimizedPlan = df.queryExecution.optimizedPlan
+      optimizedPlan.stats
+      val subqueryExpression = ArrayBuffer.empty[SubqueryExpression]

Review comment:
       nit: `mutable.ArrayBuffer.empty[SubqueryExpression]`
   
   (since we already imported `mutable` package)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org