You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yu...@apache.org on 2022/06/01 03:21:50 UTC
[spark] branch master updated: [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty
This is an automated email from the ASF dual-hosted git repository.
yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cb0e4198996 [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty
cb0e4198996 is described below
commit cb0e41989964dbb3b287f6df8f0e70222e50c5eb
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Wed Jun 1 11:21:13 2022 +0800
[SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty
### What changes were proposed in this pull request?
Remove dynamic pruning subquery if pruningKey's references is empty. For example:
```sql
SELECT f.store_id,
f.date_id,
s.state_province
FROM (SELECT store_id,
date_id,
product_id
FROM fact_stats
WHERE date_id <= 1000
UNION ALL
SELECT 4 AS store_id,
date_id,
product_id
FROM fact_sk
WHERE date_id >= 1300) f
JOIN dim_store s
ON f.store_id = s.store_id
WHERE s.country IN ('US', 'NL')
```
Before this PR | After this PR
-- | --
![image](https://user-images.githubusercontent.com/5399861/170940803-b4d4b93d-96d7-47de-ac22-d13259f3447a.png) | ![image](https://user-images.githubusercontent.com/5399861/170941010-b7eec26e-9f93-4ae6-aa10-aacdde15af0d.png)
### Why are the changes needed?
Remove useless dynamic pruning subquery because it can't reduce partition.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit test.
Closes #36724 from wangyum/SPARK-39338.
Authored-by: Yuming Wang <yu...@ebay.com>
Signed-off-by: Yuming Wang <yu...@ebay.com>
---
.../CleanupDynamicPruningFilters.scala | 6 +++--
.../spark/sql/DynamicPartitionPruningSuite.scala | 29 ++++++++++++++++++++++
2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
index 65621fb1860..9607ca53964 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
@@ -50,11 +50,13 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp
private def removeUnnecessaryDynamicPruningSubquery(plan: LogicalPlan): LogicalPlan = {
plan.transformWithPruning(_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
case f @ Filter(condition, _) =>
- val unnecessaryPruningKeys = ExpressionSet(collectEqualityConditionExpressions(condition))
+ lazy val unnecessaryPruningKeys =
+ ExpressionSet(collectEqualityConditionExpressions(condition))
val newCondition = condition.transformWithPruning(
_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
case dynamicPruning: DynamicPruningSubquery
- if unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
+ if dynamicPruning.pruningKey.references.isEmpty ||
+ unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
TrueLiteral
}
f.copy(condition = newCondition)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index 29c178d6b83..b1b9ed04568 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -1587,6 +1587,35 @@ abstract class DynamicPartitionPruningSuiteBase
}
}
}
+
+ test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty") {
+ withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+ val df = sql(
+ """
+ |SELECT f.store_id,
+ | f.date_id,
+ | s.state_province
+ |FROM (SELECT store_id,
+ | date_id,
+ | product_id
+ | FROM fact_stats
+ | WHERE date_id <= 1000
+ | UNION ALL
+ | SELECT 4 AS store_id,
+ | date_id,
+ | product_id
+ | FROM fact_sk
+ | WHERE date_id >= 1300) f
+ |JOIN dim_store s
+ |ON f.store_id = s.store_id
+ |WHERE s.country IN ('US', 'NL')
+ |""".stripMargin)
+
+ checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = true)
+ checkAnswer(df, Row(4, 1300, "California") :: Row(1, 1000, "North-Holland") :: Nil)
+ assert(collectDynamicPruningExpressions(df.queryExecution.executedPlan).size === 1)
+ }
+ }
}
abstract class DynamicPartitionPruningDataSourceSuiteBase
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org