You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/09/12 13:12:13 UTC

spark git commit: [SPARK-25402][SQL] Null handling in BooleanSimplification

Repository: spark
Updated Branches:
  refs/heads/master 9f5c5b4cc -> 79cc59718


[SPARK-25402][SQL] Null handling in BooleanSimplification

## What changes were proposed in this pull request?
This PR is to fix the null handling in BooleanSimplification. In the rule BooleanSimplification, there are two cases that do not properly handle null values. The optimization is not right if either side is null. This PR is to fix them.

## How was this patch tested?
Added test cases

Closes #22390 from gatorsmile/fixBooleanSimplification.

Authored-by: gatorsmile <ga...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/79cc5971
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/79cc5971
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/79cc5971

Branch: refs/heads/master
Commit: 79cc59718fdf7785bdc37a26bb8df4c6151114a6
Parents: 9f5c5b4
Author: gatorsmile <ga...@gmail.com>
Authored: Wed Sep 12 21:11:22 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Wed Sep 12 21:11:22 2018 +0800

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/expressions.scala    | 13 ++++--
 .../optimizer/BooleanSimplificationSuite.scala  | 45 ++++++++++++++++++--
 .../org/apache/spark/sql/DataFrameSuite.scala   | 10 +++++
 3 files changed, 60 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/79cc5971/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 5629b72..f803758 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -263,10 +263,15 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
       case TrueLiteral Or _ => TrueLiteral
       case _ Or TrueLiteral => TrueLiteral
 
-      case a And b if Not(a).semanticEquals(b) => FalseLiteral
-      case a Or b if Not(a).semanticEquals(b) => TrueLiteral
-      case a And b if a.semanticEquals(Not(b)) => FalseLiteral
-      case a Or b if a.semanticEquals(Not(b)) => TrueLiteral
+      case a And b if Not(a).semanticEquals(b) =>
+        If(IsNull(a), Literal.create(null, a.dataType), FalseLiteral)
+      case a And b if a.semanticEquals(Not(b)) =>
+        If(IsNull(b), Literal.create(null, b.dataType), FalseLiteral)
+
+      case a Or b if Not(a).semanticEquals(b) =>
+        If(IsNull(a), Literal.create(null, a.dataType), TrueLiteral)
+      case a Or b if a.semanticEquals(Not(b)) =>
+        If(IsNull(b), Literal.create(null, b.dataType), TrueLiteral)
 
       case a And b if a.semanticEquals(b) => a
       case a Or b if a.semanticEquals(b) => a

http://git-wip-us.apache.org/repos/asf/spark/blob/79cc5971/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 653c07f..6cd1108 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.BooleanType
 
 class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
 
@@ -37,6 +38,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
       Batch("Constant Folding", FixedPoint(50),
         NullPropagation,
         ConstantFolding,
+        SimplifyConditionals,
         BooleanSimplification,
         PruneFilters) :: Nil
   }
@@ -48,6 +50,14 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
     testRelation.output, Seq(Row(1, 2, 3, "abc"))
   )
 
+  val testNotNullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull,
+    'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull,
+    'h.boolean.notNull)
+
+  val testNotNullableRelationWithData = LocalRelation.fromExternalRows(
+    testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc"))
+  )
+
   private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
     val plan = testRelationWithData.where(input).analyze
     val actual = Optimize.execute(plan)
@@ -61,6 +71,13 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
     comparePlans(actual, correctAnswer)
   }
 
+  private def checkConditionInNotNullableRelation(
+      input: Expression, expected: LogicalPlan): Unit = {
+    val plan = testNotNullableRelationWithData.where(input).analyze
+    val actual = Optimize.execute(plan)
+    comparePlans(actual, expected)
+  }
+
   test("a && a => a") {
     checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
     checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
@@ -174,10 +191,30 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
   }
 
   test("Complementation Laws") {
-    checkCondition('a && !'a, testRelation)
-    checkCondition(!'a && 'a, testRelation)
+    checkConditionInNotNullableRelation('e && !'e, testNotNullableRelation)
+    checkConditionInNotNullableRelation(!'e && 'e, testNotNullableRelation)
+
+    checkConditionInNotNullableRelation('e || !'e, testNotNullableRelationWithData)
+    checkConditionInNotNullableRelation(!'e || 'e, testNotNullableRelationWithData)
+  }
+
+  test("Complementation Laws - null handling") {
+    checkCondition('e && !'e,
+      testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
+    checkCondition(!'e && 'e,
+      testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
+
+    checkCondition('e || !'e,
+      testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
+    checkCondition(!'e || 'e,
+      testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
+  }
+
+  test("Complementation Laws - negative case") {
+    checkCondition('e && !'f, testRelationWithData.where('e && !'f).analyze)
+    checkCondition(!'f && 'e, testRelationWithData.where(!'f && 'e).analyze)
 
-    checkCondition('a || !'a, testRelationWithData)
-    checkCondition(!'a || 'a, testRelationWithData)
+    checkCondition('e || !'f, testRelationWithData.where('e || !'f).analyze)
+    checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/79cc5971/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 435b887..279b7b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2569,4 +2569,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     check(lit(2).cast("int"), $"c" === 2, Seq(Row(1, 1, 2, 0), Row(1, 1, 2, 1)))
     check(lit(2).cast("int"), $"c" =!= 2, Seq())
   }
+
+  test("SPARK-25402 Null handling in BooleanSimplification") {
+    val schema = StructType.fromDDL("a boolean, b int")
+    val rows = Seq(Row(null, 1))
+
+    val rdd = sparkContext.parallelize(rows)
+    val df = spark.createDataFrame(rdd, schema)
+
+    checkAnswer(df.where("(NOT a) OR a"), Seq.empty)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org