You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/03/22 18:17:14 UTC

spark git commit: [SPARK-14029][SQL] Improve BooleanSimplification optimization by implementing `Not` canonicalization.

Repository: spark
Updated Branches:
  refs/heads/master 0ce01635c -> c632bdc01


[SPARK-14029][SQL] Improve BooleanSimplification optimization by implementing `Not` canonicalization.

## What changes were proposed in this pull request?

Currently, **BooleanSimplification** optimization can handle the following cases.
* a && (!a || b ) ==> a && b
* a && (b || !a ) ==> a && b

However, it can not handle the followings cases since those equations fail at the comparisons between their canonicalized forms.
* a < 1 && (!(a < 1) || b)     ==> (a < 1) && b
* a <= 1 && (!(a <= 1) || b) ==> (a <= 1) && b
* a > 1 && (!(a > 1) || b)     ==> (a > 1) && b
* a >= 1 && (!(a >= 1) || b) ==> (a >= 1) && b

This PR implements the above cases and also the followings, too.
* a < 1 && ((a >= 1) || b )   ==> (a < 1) && b
* a <= 1 && ((a > 1) || b )   ==> (a <= 1) && b
* a > 1 && ((a <= 1) || b)  ==> (a > 1) && b
* a >= 1 && ((a < 1) || b)  ==> (a >= 1) && b

## How was this patch tested?

Pass the Jenkins tests including new test cases in BooleanSimplicationSuite.

Author: Dongjoon Hyun <do...@apache.org>

Closes #11851 from dongjoon-hyun/SPARK-14029.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c632bdc0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c632bdc0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c632bdc0

Branch: refs/heads/master
Commit: c632bdc01f51bb253fa3dc258ffa7fdecf814d35
Parents: 0ce0163
Author: Dongjoon Hyun <do...@apache.org>
Authored: Tue Mar 22 10:17:08 2016 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Tue Mar 22 10:17:08 2016 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/Canonicalize.scala |  9 +++++++
 .../expressions/ExpressionSetSuite.scala        |  6 +++++
 .../optimizer/BooleanSimplificationSuite.scala  | 28 ++++++++++++++++++++
 3 files changed, 43 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c632bdc0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index ae1f600..07ba7d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -71,6 +71,15 @@ object Canonicalize extends {
     case GreaterThanOrEqual(l, r) if l.hashCode() > r.hashCode() => LessThanOrEqual(r, l)
     case LessThanOrEqual(l, r) if l.hashCode() > r.hashCode() => GreaterThanOrEqual(r, l)
 
+    case Not(GreaterThan(l, r)) if l.hashCode() > r.hashCode() => GreaterThan(r, l)
+    case Not(GreaterThan(l, r)) => LessThanOrEqual(l, r)
+    case Not(LessThan(l, r)) if l.hashCode() > r.hashCode() => LessThan(r, l)
+    case Not(LessThan(l, r)) => GreaterThanOrEqual(l, r)
+    case Not(GreaterThanOrEqual(l, r)) if l.hashCode() > r.hashCode() => GreaterThanOrEqual(r, l)
+    case Not(GreaterThanOrEqual(l, r)) => LessThan(l, r)
+    case Not(LessThanOrEqual(l, r)) if l.hashCode() > r.hashCode() => LessThanOrEqual(r, l)
+    case Not(LessThanOrEqual(l, r)) => GreaterThan(l, r)
+
     case _ => e
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/c632bdc0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
index 0b350c6..60939ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
@@ -74,6 +74,12 @@ class ExpressionSetSuite extends SparkFunSuite {
   setTest(1, aUpper > bUpper, bUpper < aUpper)
   setTest(1, aUpper >= bUpper, bUpper <= aUpper)
 
+  // `Not` canonicalization
+  setTest(1, Not(aUpper > 1), aUpper <= 1, Not(Literal(1) < aUpper), Literal(1) >= aUpper)
+  setTest(1, Not(aUpper < 1), aUpper >= 1, Not(Literal(1) > aUpper), Literal(1) <= aUpper)
+  setTest(1, Not(aUpper >= 1), aUpper < 1, Not(Literal(1) <= aUpper), Literal(1) > aUpper)
+  setTest(1, Not(aUpper <= 1), aUpper > 1, Not(Literal(1) >= aUpper), Literal(1) < aUpper)
+
   test("add to / remove from set") {
     val initialSet = ExpressionSet(aUpper + 1 :: Nil)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/c632bdc0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 47b79fe..2ab31ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -99,6 +99,34 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
     checkCondition(('b || !'a ) && 'a, 'b && 'a)
   }
 
+  test("a < 1 && (!(a < 1) || b)") {
+    checkCondition('a < 1 && (!('a < 1) || 'b), ('a < 1) && 'b)
+    checkCondition('a < 1 && ('b || !('a < 1)), ('a < 1) && 'b)
+
+    checkCondition('a <= 1 && (!('a <= 1) || 'b), ('a <= 1) && 'b)
+    checkCondition('a <= 1 && ('b || !('a <= 1)), ('a <= 1) && 'b)
+
+    checkCondition('a > 1 && (!('a > 1) || 'b), ('a > 1) && 'b)
+    checkCondition('a > 1 && ('b || !('a > 1)), ('a > 1) && 'b)
+
+    checkCondition('a >= 1 && (!('a >= 1) || 'b), ('a >= 1) && 'b)
+    checkCondition('a >= 1 && ('b || !('a >= 1)), ('a >= 1) && 'b)
+  }
+
+  test("a < 1 && ((a >= 1) || b)") {
+    checkCondition('a < 1 && ('a >= 1 || 'b ), ('a < 1) && 'b)
+    checkCondition('a < 1 && ('b || 'a >= 1), ('a < 1) && 'b)
+
+    checkCondition('a <= 1 && ('a > 1 || 'b ), ('a <= 1) && 'b)
+    checkCondition('a <= 1 && ('b || 'a > 1), ('a <= 1) && 'b)
+
+    checkCondition('a > 1 && (('a <= 1) || 'b), ('a > 1) && 'b)
+    checkCondition('a > 1 && ('b || ('a <= 1)), ('a > 1) && 'b)
+
+    checkCondition('a >= 1 && (('a < 1) || 'b), ('a >= 1) && 'b)
+    checkCondition('a >= 1 && ('b || ('a < 1)), ('a >= 1) && 'b)
+  }
+
   test("DeMorgan's law") {
     checkCondition(!('a && 'b), !'a || !'b)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org