You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/04/14 22:34:34 UTC

spark git commit: [SPARK-14545][SQL] Improve `LikeSimplification` by adding `a%b` rule

Repository: spark
Updated Branches:
  refs/heads/master bc748b7b8 -> d7e124edf


[SPARK-14545][SQL] Improve `LikeSimplification` by adding `a%b` rule

## What changes were proposed in this pull request?

Current `LikeSimplification` handles the following four rules.
- 'a%' => expr.StartsWith("a")
- '%b' => expr.EndsWith("b")
- '%a%' => expr.Contains("a")
- 'a' => EqualTo("a")

This PR adds the following rule.
- 'a%b' => expr.Length() >= 2 && expr.StartsWith("a") && expr.EndsWith("b")

Here, 2 is statically calculated from "a".size + "b".size.

**Before**
```
scala> sql("select a from (select explode(array('abc','adc')) a) T where a like 'a%c'").explain()
== Physical Plan ==
WholeStageCodegen
:  +- Filter a#5 LIKE a%c
:     +- INPUT
+- Generate explode([abc,adc]), false, false, [a#5]
   +- Scan OneRowRelation[]
```

**After**
```
scala> sql("select a from (select explode(array('abc','adc')) a) T where a like 'a%c'").explain()
== Physical Plan ==
WholeStageCodegen
:  +- Filter ((length(a#5) >= 2) && (StartsWith(a#5, a) && EndsWith(a#5, c)))
:     +- INPUT
+- Generate explode([abc,adc]), false, false, [a#5]
   +- Scan OneRowRelation[]
```

## How was this patch tested?

Pass the Jenkins tests (including new testcase).

Author: Dongjoon Hyun <do...@apache.org>

Closes #12312 from dongjoon-hyun/SPARK-14545.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d7e124ed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d7e124ed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d7e124ed

Branch: refs/heads/master
Commit: d7e124edfe2578ecdf8e816a4dda3ce430a09172
Parents: bc748b7
Author: Dongjoon Hyun <do...@apache.org>
Authored: Thu Apr 14 13:34:29 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Thu Apr 14 13:34:29 2016 -0700

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/Optimizer.scala      | 28 ++++++++++++--------
 .../optimizer/LikeSimplificationSuite.scala     | 14 ++++++++++
 2 files changed, 31 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d7e124ed/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index aeb1842..f5172b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -517,22 +517,28 @@ object LikeSimplification extends Rule[LogicalPlan] {
   // Cases like "something\%" are not optimized, but this does not affect correctness.
   private val startsWith = "([^_%]+)%".r
   private val endsWith = "%([^_%]+)".r
+  private val startsAndEndsWith = "([^_%]+)%([^_%]+)".r
   private val contains = "%([^_%]+)%".r
   private val equalTo = "([^_%]*)".r
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-    case Like(l, Literal(utf, StringType)) =>
-      utf.toString match {
-        case startsWith(pattern) if !pattern.endsWith("\\") =>
-          StartsWith(l, Literal(pattern))
-        case endsWith(pattern) =>
-          EndsWith(l, Literal(pattern))
-        case contains(pattern) if !pattern.endsWith("\\") =>
-          Contains(l, Literal(pattern))
-        case equalTo(pattern) =>
-          EqualTo(l, Literal(pattern))
+    case Like(input, Literal(pattern, StringType)) =>
+      pattern.toString match {
+        case startsWith(prefix) if !prefix.endsWith("\\") =>
+          StartsWith(input, Literal(prefix))
+        case endsWith(postfix) =>
+          EndsWith(input, Literal(postfix))
+        // 'a%a' pattern is basically same with 'a%' && '%a'.
+        // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
+        case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
+          And(GreaterThanOrEqual(Length(input), Literal(prefix.size + postfix.size)),
+            And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
+        case contains(infix) if !infix.endsWith("\\") =>
+          Contains(input, Literal(infix))
+        case equalTo(str) =>
+          EqualTo(input, Literal(str))
         case _ =>
-          Like(l, Literal.create(utf, StringType))
+          Like(input, Literal.create(pattern, StringType))
       }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/d7e124ed/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
index 741bc11..fdde89d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -61,6 +61,20 @@ class LikeSimplificationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("simplify Like into startsWith and EndsWith") {
+    val originalQuery =
+      testRelation
+        .where(('a like "abc\\%def") || ('a like "abc%def"))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where(('a like "abc\\%def") ||
+        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def"))))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("simplify Like into Contains") {
     val originalQuery =
       testRelation


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org