You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/09/01 05:55:57 UTC
[spark] branch branch-3.0 updated: [SPARK-32659][SQL][FOLLOWUP] Improve test for pruning DPP on non-atomic type

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 98a1247  [SPARK-32659][SQL][FOLLOWUP] Improve test for pruning DPP on non-atomic type
98a1247 is described below

commit 98a1247f379170afbd9a64a3a51d6235ff3df02e
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Tue Sep 1 05:51:04 2020 +0000

    [SPARK-32659][SQL][FOLLOWUP] Improve test for pruning DPP on non-atomic type
    
    ### What changes were proposed in this pull request?
    
    Improve test for pruning DPP on non-atomic type:
    - Avoid creating new partition tables. This may take 30 seconds..
    - Add test `array` type.
    
    ### Why are the changes needed?
    
    Improve test.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    N/A
    
    Closes #29595 from wangyum/SPARK-32659-test.
    
    Authored-by: Yuming Wang <yu...@ebay.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit a701bc79e3bb936e8fb4a4fbe11e9e0bd0ccd8ac)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/DynamicPartitionPruningSuite.scala   | 68 +++++++++-------------
 1 file changed, 26 insertions(+), 42 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index bf91bdb7..47c5c9ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql
 
 import org.scalatest.GivenWhenThen
 
-import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, DynamicPruningExpression, Expression}
+import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.plans.ExistenceJoin
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper}
@@ -1311,48 +1312,31 @@ abstract class DynamicPartitionPruningSuiteBase
   }
 
   test("SPARK-32659: Fix the data issue when pruning DPP on non-atomic type") {
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "2", // Make sure insert DPP
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false") {
-      withTable("df1", "df2") {
-        spark.range(1000)
-          .select(col("id"), col("id").as("k"))
-          .write
-          .partitionBy("k")
-          .format(tableFormat)
-          .mode("overwrite")
-          .saveAsTable("df1")
-
-        spark.range(100)
-          .select(col("id"), col("id").as("k"))
-          .write
-          .partitionBy("k")
-          .format(tableFormat)
-          .mode("overwrite")
-          .saveAsTable("df2")
-
-        Seq(CodegenObjectFactoryMode.NO_CODEGEN,
-          CodegenObjectFactoryMode.CODEGEN_ONLY).foreach { mode =>
-          Seq(true, false).foreach { pruning =>
-            withSQLConf(
-              SQLConf.CODEGEN_FACTORY_MODE.key -> mode.toString,
-              SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> s"$pruning") {
-              val df = sql(
-                """
-                  |SELECT df1.id, df2.k
-                  |FROM df1
-                  |  JOIN df2
-                  |  ON struct(df1.k) = struct(df2.k)
-                  |    AND df2.id < 2
-                  |""".stripMargin)
-              if (pruning) {
-                checkPartitionPruningPredicate(df, true, false)
-              } else {
-                checkPartitionPruningPredicate(df, false, false)
-              }
-
-              checkAnswer(df, Row(0, 0) :: Row(1, 1) :: Nil)
+    Seq(NO_CODEGEN, CODEGEN_ONLY).foreach { mode =>
+      Seq(true, false).foreach { pruning =>
+        withSQLConf(
+          SQLConf.CODEGEN_FACTORY_MODE.key -> mode.toString,
+          SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> s"$pruning") {
+          Seq("struct", "array").foreach { dataType =>
+            val df = sql(
+              s"""
+                 |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
+                 |JOIN dim_stats s
+                 |ON $dataType(f.store_id) = $dataType(s.store_id) WHERE s.country = 'DE'
+              """.stripMargin)
+
+            if (pruning) {
+              checkPartitionPruningPredicate(df, false, true)
+            } else {
+              checkPartitionPruningPredicate(df, false, false)
             }
+
+            checkAnswer(df,
+              Row(1030, 2, 10, 3) ::
+              Row(1040, 2, 50, 3) ::
+              Row(1050, 2, 50, 3) ::
+              Row(1060, 2, 50, 3) :: Nil
+            )
           }
         }
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org