You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yu...@apache.org on 2022/06/01 03:21:50 UTC

[spark] branch master updated: [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty

This is an automated email from the ASF dual-hosted git repository.

yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new cb0e4198996 [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty
cb0e4198996 is described below

commit cb0e41989964dbb3b287f6df8f0e70222e50c5eb
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Wed Jun 1 11:21:13 2022 +0800

    [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty
    
    ### What changes were proposed in this pull request?
    
    Remove dynamic pruning subquery if pruningKey's references is empty. For example:
    ```sql
    SELECT f.store_id,
           f.date_id,
           s.state_province
    FROM (SELECT   store_id,
                   date_id,
                   product_id
          FROM   fact_stats
          WHERE  date_id <= 1000
          UNION ALL
          SELECT 4 AS store_id,
                   date_id,
                   product_id
          FROM   fact_sk
          WHERE  date_id >= 1300) f
    JOIN dim_store s
    ON f.store_id = s.store_id
    WHERE s.country IN ('US', 'NL')
    ```
    
    Before this PR | After this PR
    -- | --
    ![image](https://user-images.githubusercontent.com/5399861/170940803-b4d4b93d-96d7-47de-ac22-d13259f3447a.png) | ![image](https://user-images.githubusercontent.com/5399861/170941010-b7eec26e-9f93-4ae6-aa10-aacdde15af0d.png)
    
    ### Why are the changes needed?
    
    Remove useless dynamic pruning subquery because it can't reduce partition.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Unit test.
    
    Closes #36724 from wangyum/SPARK-39338.
    
    Authored-by: Yuming Wang <yu...@ebay.com>
    Signed-off-by: Yuming Wang <yu...@ebay.com>
---
 .../CleanupDynamicPruningFilters.scala             |  6 +++--
 .../spark/sql/DynamicPartitionPruningSuite.scala   | 29 ++++++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
index 65621fb1860..9607ca53964 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
@@ -50,11 +50,13 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp
   private def removeUnnecessaryDynamicPruningSubquery(plan: LogicalPlan): LogicalPlan = {
     plan.transformWithPruning(_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
       case f @ Filter(condition, _) =>
-        val unnecessaryPruningKeys = ExpressionSet(collectEqualityConditionExpressions(condition))
+        lazy val unnecessaryPruningKeys =
+          ExpressionSet(collectEqualityConditionExpressions(condition))
         val newCondition = condition.transformWithPruning(
           _.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
           case dynamicPruning: DynamicPruningSubquery
-              if unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
+              if dynamicPruning.pruningKey.references.isEmpty ||
+                unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
             TrueLiteral
         }
         f.copy(condition = newCondition)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index 29c178d6b83..b1b9ed04568 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -1587,6 +1587,35 @@ abstract class DynamicPartitionPruningSuiteBase
       }
     }
   }
+
+  test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      val df = sql(
+        """
+          |SELECT f.store_id,
+          |       f.date_id,
+          |       s.state_province
+          |FROM (SELECT   store_id,
+          |               date_id,
+          |               product_id
+          |      FROM   fact_stats
+          |      WHERE  date_id <= 1000
+          |      UNION ALL
+          |      SELECT 4 AS store_id,
+          |               date_id,
+          |               product_id
+          |      FROM   fact_sk
+          |      WHERE  date_id >= 1300) f
+          |JOIN dim_store s
+          |ON f.store_id = s.store_id
+          |WHERE s.country IN ('US', 'NL')
+          |""".stripMargin)
+
+      checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = true)
+      checkAnswer(df, Row(4, 1300, "California") :: Row(1, 1000, "North-Holland") :: Nil)
+      assert(collectDynamicPruningExpressions(df.queryExecution.executedPlan).size === 1)
+    }
+  }
 }
 
 abstract class DynamicPartitionPruningDataSourceSuiteBase


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org