You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/03/17 00:28:19 UTC
spark git commit: [SPARK-13869][SQL] Remove redundant conditions
while combining filters
Repository: spark
Updated Branches:
refs/heads/master f96997ba2 -> 77ba3021c
[SPARK-13869][SQL] Remove redundant conditions while combining filters
## What changes were proposed in this pull request?
**[I'll link it to the JIRA once ASF JIRA is back online]**
This PR modifies the existing `CombineFilters` rule to remove redundant conditions while combining individual filter predicates. For instance, queries of the form `table.where('a === 1 && 'b === 1).where('a === 1 && 'c === 1)` will now be optimized to ` table.where('a === 1 && 'b === 1 && 'c === 1)` (instead of ` table.where('a === 1 && 'a === 1 && 'b === 1 && 'c === 1)`)
## How was this patch tested?
Unit test in `FilterPushdownSuite`
Author: Sameer Agarwal <sa...@databricks.com>
Closes #11670 from sameeragarwal/combine-filters.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77ba3021
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77ba3021
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77ba3021
Branch: refs/heads/master
Commit: 77ba3021c12dc63cb7d831f964f901e0474acd96
Parents: f96997b
Author: Sameer Agarwal <sa...@databricks.com>
Authored: Wed Mar 16 16:27:46 2016 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Wed Mar 16 16:27:46 2016 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/optimizer/Optimizer.scala | 15 +++++++++++----
.../sql/catalyst/optimizer/FilterPushdownSuite.scala | 15 +++++++++++++++
2 files changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/77ba3021/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 76f50a3..3f57b07 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -818,12 +818,19 @@ object CombineUnions extends Rule[LogicalPlan] {
}
/**
- * Combines two adjacent [[Filter]] operators into one, merging the
- * conditions into one conjunctive predicate.
+ * Combines two adjacent [[Filter]] operators into one, merging the non-redundant conditions into
+ * one conjunctive predicate.
*/
-object CombineFilters extends Rule[LogicalPlan] {
+object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
- case ff @ Filter(fc, nf @ Filter(nc, grandChild)) => Filter(And(nc, fc), grandChild)
+ case ff @ Filter(fc, nf @ Filter(nc, grandChild)) =>
+ (ExpressionSet(splitConjunctivePredicates(fc)) --
+ ExpressionSet(splitConjunctivePredicates(nc))).reduceOption(And) match {
+ case Some(ac) =>
+ Filter(And(ac, nc), grandChild)
+ case None =>
+ nf
+ }
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/77ba3021/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index a636d63..b84ae7c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -81,6 +81,21 @@ class FilterPushdownSuite extends PlanTest {
comparePlans(optimized, correctAnswer)
}
+ test("combine redundant filters") {
+ val originalQuery =
+ testRelation
+ .where('a === 1 && 'b === 1)
+ .where('a === 1 && 'c === 1)
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .where('a === 1 && 'b === 1 && 'c === 1)
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
test("can't push without rewrite") {
val originalQuery =
testRelation
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org