You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/01/20 20:20:18 UTC
spark git commit: [SQL][Minor] Refactors deeply nested FP style code
in BooleanSimplification
Repository: spark
Updated Branches:
refs/heads/master 9d9294aeb -> 814080278
[SQL][Minor] Refactors deeply nested FP style code in BooleanSimplification
This is a follow-up of #4090. The original deeply nested `reduceOption` code is hard to grasp.
<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/4091)
<!-- Reviewable:end -->
Author: Cheng Lian <li...@databricks.com>
Closes #4091 from liancheng/refactor-boolean-simplification and squashes the following commits:
cd8860b [Cheng Lian] Improves `compareConditions` to handle more subtle cases
1bf3258 [Cheng Lian] Avoids converting predicate sets to lists
e833ca4 [Cheng Lian] Refactors deeply nested FP style code
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/81408027
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/81408027
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/81408027
Branch: refs/heads/master
Commit: 814080278605148e7a75651283253d09a04e5a74
Parents: 9d9294a
Author: Cheng Lian <li...@databricks.com>
Authored: Tue Jan 20 11:20:14 2015 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Jan 20 11:20:14 2015 -0800
----------------------------------------------------------------------
.../sql/catalyst/optimizer/Optimizer.scala | 54 ++++++++++----------
.../optimizer/BooleanSimplificationSuite.scala | 40 +++++++++++----
2 files changed, 57 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/81408027/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 81bb012..376a9f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -314,7 +314,7 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
// a && a => a
case (l, r) if l fastEquals r => l
// (a || b) && (a || c) => a || (b && c)
- case (_, _) =>
+ case _ =>
// 1. Split left and right to get the disjunctive predicates,
// i.e. lhsSet = (a, b), rhsSet = (a, c)
// 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
@@ -323,19 +323,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
val lhsSet = splitDisjunctivePredicates(left).toSet
val rhsSet = splitDisjunctivePredicates(right).toSet
val common = lhsSet.intersect(rhsSet)
- val ldiff = lhsSet.diff(common)
- val rdiff = rhsSet.diff(common)
- if (ldiff.size == 0 || rdiff.size == 0) {
- // a && (a || b) => a
- common.reduce(Or)
+ if (common.isEmpty) {
+ // No common factors, return the original predicate
+ and
} else {
- // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... =>
- // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...)
- (ldiff.reduceOption(Or) ++ rdiff.reduceOption(Or))
- .reduceOption(And)
- .map(_ :: common.toList)
- .getOrElse(common.toList)
- .reduce(Or)
+ val ldiff = lhsSet.diff(common)
+ val rdiff = rhsSet.diff(common)
+ if (ldiff.isEmpty || rdiff.isEmpty) {
+ // (a || b || c || ...) && (a || b) => (a || b)
+ common.reduce(Or)
+ } else {
+ // (a || b || c || ...) && (a || b || d || ...) =>
+ // ((c || ...) && (d || ...)) || a || b
+ (common + And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
+ }
}
} // end of And(left, right)
@@ -351,7 +352,7 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
// a || a => a
case (l, r) if l fastEquals r => l
// (a && b) || (a && c) => a && (b || c)
- case (_, _) =>
+ case _ =>
// 1. Split left and right to get the conjunctive predicates,
// i.e. lhsSet = (a, b), rhsSet = (a, c)
// 2. Find the common predict between lhsSet and rhsSet, i.e. common = (a)
@@ -360,19 +361,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
val lhsSet = splitConjunctivePredicates(left).toSet
val rhsSet = splitConjunctivePredicates(right).toSet
val common = lhsSet.intersect(rhsSet)
- val ldiff = lhsSet.diff(common)
- val rdiff = rhsSet.diff(common)
- if ( ldiff.size == 0 || rdiff.size == 0) {
- // a || (b && a) => a
- common.reduce(And)
+ if (common.isEmpty) {
+ // No common factors, return the original predicate
+ or
} else {
- // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... =>
- // a && b && ((c && ...) || (d && ...) || (e && ...) || ...)
- (ldiff.reduceOption(And) ++ rdiff.reduceOption(And))
- .reduceOption(Or)
- .map(_ :: common.toList)
- .getOrElse(common.toList)
- .reduce(And)
+ val ldiff = lhsSet.diff(common)
+ val rdiff = rhsSet.diff(common)
+ if (ldiff.isEmpty || rdiff.isEmpty) {
+ // (a && b) || (a && b && c && ...) => a && b
+ common.reduce(And)
+ } else {
+ // (a && b && c && ...) || (a && b && d && ...) =>
+ // ((c && ...) || (d && ...)) && a && b
+ (common + Or(ldiff.reduce(And), rdiff.reduce(And))).reduce(And)
+ }
}
} // end of Or(left, right)
http://git-wip-us.apache.org/repos/asf/spark/blob/81408027/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index a0863da..264a0ef 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -18,14 +18,14 @@
package org.apache.spark.sql.catalyst.optimizer
import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
-import org.apache.spark.sql.catalyst.expressions.{Literal, Expression}
+import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.dsl.expressions._
-class BooleanSimplificationSuite extends PlanTest {
+class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
object Optimize extends RuleExecutor[LogicalPlan] {
val batches =
@@ -40,11 +40,29 @@ class BooleanSimplificationSuite extends PlanTest {
val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string)
- def checkCondition(originCondition: Expression, optimizedCondition: Expression): Unit = {
- val originQuery = testRelation.where(originCondition).analyze
- val optimized = Optimize(originQuery)
- val expected = testRelation.where(optimizedCondition).analyze
- comparePlans(optimized, expected)
+ // The `foldLeft` is required to handle cases like comparing `a && (b && c)` and `(a && b) && c`
+ def compareConditions(e1: Expression, e2: Expression): Boolean = (e1, e2) match {
+ case (lhs: And, rhs: And) =>
+ val lhsSet = splitConjunctivePredicates(lhs).toSet
+ val rhsSet = splitConjunctivePredicates(rhs).toSet
+ lhsSet.foldLeft(rhsSet) { (set, e) =>
+ set.find(compareConditions(_, e)).map(set - _).getOrElse(set)
+ }.isEmpty
+
+ case (lhs: Or, rhs: Or) =>
+ val lhsSet = splitDisjunctivePredicates(lhs).toSet
+ val rhsSet = splitDisjunctivePredicates(rhs).toSet
+ lhsSet.foldLeft(rhsSet) { (set, e) =>
+ set.find(compareConditions(_, e)).map(set - _).getOrElse(set)
+ }.isEmpty
+
+ case (l, r) => l == r
+ }
+
+ def checkCondition(input: Expression, expected: Expression): Unit = {
+ val plan = testRelation.where(input).analyze
+ val actual = Optimize(plan).expressions.head
+ compareConditions(actual, expected)
}
test("a && a => a") {
@@ -72,8 +90,8 @@ class BooleanSimplificationSuite extends PlanTest {
(((('b > 3) && ('c > 2)) ||
(('c < 1) && ('a === 5))) ||
(('b < 5) && ('a > 1))) && ('a === 'b)
- checkCondition(input, expected)
+ checkCondition(input, expected)
}
test("(a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ...") {
@@ -85,8 +103,8 @@ class BooleanSimplificationSuite extends PlanTest {
checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2)
- var input: Expression = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5)
- var expected: Expression = ('b > 3 && 'a > 3 && 'a < 5) || 'a === 'b
- checkCondition(input, expected)
+ checkCondition(
+ ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5),
+ ('b > 3 && 'a > 3 && 'a < 5) || 'a === 'b)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org