You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/07/18 23:24:06 UTC

git commit: [SPARK-2535][SQL] Add StringComparison case to NullPropagation.

Repository: spark
Updated Branches:
  refs/heads/master d88f6be44 -> 3a1709fa5


[SPARK-2535][SQL] Add StringComparison case to NullPropagation.

`StringComparison` expressions including `null` literal cases could be added to `NullPropagation`.

Author: Takuya UESHIN <ue...@happy-camper.st>

Closes #1451 from ueshin/issues/SPARK-2535 and squashes the following commits:

e99c237 [Takuya UESHIN] Add some tests.
8f9b984 [Takuya UESHIN] Add StringComparison case to NullPropagation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a1709fa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a1709fa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a1709fa

Branch: refs/heads/master
Commit: 3a1709fa557f2bd6d101bc67a9e773882078c527
Parents: d88f6be
Author: Takuya UESHIN <ue...@happy-camper.st>
Authored: Fri Jul 18 16:24:00 2014 -0500
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Fri Jul 18 16:24:00 2014 -0500

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/Optimizer.scala      |  5 ++
 .../expressions/ExpressionEvaluationSuite.scala | 23 ++++-
 .../optimizer/ConstantFoldingSuite.scala        | 10 ++-
 .../optimizer/LikeSimplificationSuite.scala     | 90 ++++++++++++++++++++
 4 files changed, 125 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3a1709fa/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 7f32f6b..c65987b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -188,6 +188,11 @@ object NullPropagation extends Rule[LogicalPlan] {
         case left :: Literal(null, _) :: Nil => Literal(null, e.dataType)
         case _ => e
       }
+      case e: StringComparison => e.children match {
+        case Literal(null, _) :: right :: Nil => Literal(null, e.dataType)
+        case left :: Literal(null, _) :: Nil => Literal(null, e.dataType)
+        case _ => e
+      }
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/3a1709fa/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index 143330b..73f5464 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -466,7 +466,28 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(c1 === c2, false, row)
     checkEvaluation(c1 !== c2, true, row)
   }
-  
+
+  test("StringComparison") {
+    val row = new GenericRow(Array[Any]("abc", null))
+    val c1 = 'a.string.at(0)
+    val c2 = 'a.string.at(1)
+
+    checkEvaluation(Contains(c1, "b"), true, row)
+    checkEvaluation(Contains(c1, "x"), false, row)
+    checkEvaluation(Contains(c2, "b"), null, row)
+    checkEvaluation(Contains(c1, Literal(null, StringType)), null, row)
+
+    checkEvaluation(StartsWith(c1, "a"), true, row)
+    checkEvaluation(StartsWith(c1, "b"), false, row)
+    checkEvaluation(StartsWith(c2, "a"), null, row)
+    checkEvaluation(StartsWith(c1, Literal(null, StringType)), null, row)
+
+    checkEvaluation(EndsWith(c1, "c"), true, row)
+    checkEvaluation(EndsWith(c1, "b"), false, row)
+    checkEvaluation(EndsWith(c2, "b"), null, row)
+    checkEvaluation(EndsWith(c1, Literal(null, StringType)), null, row)
+  }
+
   test("Substring") {
     val row = new GenericRow(Array[Any]("example", "example".toArray.map(_.toByte)))
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3a1709fa/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index ff8d0d0..d607eed 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -205,7 +205,10 @@ class ConstantFoldingSuite extends PlanTest {
 
           Substring(Literal(null, StringType), 0, 1) as 'c16,
           Substring("abc", Literal(null, IntegerType), 1) as 'c17,
-          Substring("abc", 0, Literal(null, IntegerType)) as 'c18
+          Substring("abc", 0, Literal(null, IntegerType)) as 'c18,
+
+          Contains(Literal(null, StringType), "abc") as 'c19,
+          Contains("abc", Literal(null, StringType)) as 'c20
         )
 
     val optimized = Optimize(originalQuery.analyze)
@@ -237,7 +240,10 @@ class ConstantFoldingSuite extends PlanTest {
 
           Literal(null, StringType) as 'c16,
           Literal(null, StringType) as 'c17,
-          Literal(null, StringType) as 'c18
+          Literal(null, StringType) as 'c18,
+
+          Literal(null, BooleanType) as 'c19,
+          Literal(null, BooleanType) as 'c20
         ).analyze
 
     comparePlans(optimized, correctAnswer)

http://git-wip-us.apache.org/repos/asf/spark/blob/3a1709fa/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
new file mode 100644
index 0000000..b10577c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.rules._
+
+/* Implicit conversions */
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+
+class LikeSimplificationSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Like Simplification", Once,
+        LikeSimplification) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.string)
+
+  test("simplify Like into StartsWith") {
+    val originalQuery =
+      testRelation
+        .where(('a like "abc%") || ('a like "abc\\%"))
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(StartsWith('a, "abc") || ('a like "abc\\%"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify Like into EndsWith") {
+    val originalQuery =
+      testRelation
+        .where('a like "%xyz")
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(EndsWith('a, "xyz"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify Like into Contains") {
+    val originalQuery =
+      testRelation
+        .where(('a like "%mn%") || ('a like "%mn\\%"))
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(Contains('a, "mn") || ('a like "%mn\\%"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify Like into EqualTo") {
+    val originalQuery =
+      testRelation
+        .where(('a like "") || ('a like "abc"))
+
+    val optimized = Optimize(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(('a === "") || ('a === "abc"))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}