You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/04/03 21:35:30 UTC

spark git commit: [SPARK-6647][SQL] Make trait StringComparison as BinaryPredicate and fix unit tests of string data source Filter

Repository: spark
Updated Branches:
  refs/heads/master 14632b794 -> 26b415e15


[SPARK-6647][SQL] Make trait StringComparison as BinaryPredicate and fix unit tests of string data source Filter

Now trait `StringComparison` is a `BinaryExpression`. In fact, it should be a `BinaryPredicate`.

By making `StringComparison` as `BinaryPredicate`, we can throw error when a `expressions.Predicate` can't translate to a data source `Filter` in function `selectFilters`.

Without this modification, because we will wrap a `Filter` outside the scanned results in `pruneFilterProjectRaw`, we can't detect about something is wrong in translating predicates to filters in `selectFilters`.

The unit test of #5285 demonstrates such problem. In that pr, even `expressions.Contains` is not properly translated to `sources.StringContains`, the filtering is still performed by the `Filter` and so the test passes.

Of course, by doing this modification, all `expressions.Predicate` classes need to have its data source `Filter` correspondingly.

There is a small bug in `FilteredScanSuite` for doing `StringEndsWith` filter. This pr also fixes it.

Author: Liang-Chi Hsieh <vi...@gmail.com>

Closes #5309 from viirya/translate_predicate and squashes the following commits:

b176385 [Liang-Chi Hsieh] Address comment.
275a493 [Liang-Chi Hsieh] More properly test for StringStartsWith, StringEndsWith and StringContains.
caf2347 [Liang-Chi Hsieh] Make trait StringComparison as BinaryPredicate and throw error when Predicate can't translate to data source Filter.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26b415e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26b415e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26b415e1

Branch: refs/heads/master
Commit: 26b415e15970d02523f0df459557b09ffda0c8c1
Parents: 14632b7
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Fri Apr 3 12:35:00 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Fri Apr 3 12:35:00 2015 -0700

----------------------------------------------------------------------
 .../catalyst/expressions/stringOperations.scala | 11 ++++----
 .../spark/sql/sources/FilteredScanSuite.scala   | 28 ++++++++++++++------
 2 files changed, 25 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/26b415e1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 3cdca4e..acfbbac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -156,12 +156,11 @@ case class Lower(child: Expression) extends UnaryExpression with CaseConversionE
 
 /** A base trait for functions that compare two strings, returning a boolean. */
 trait StringComparison {
-  self: BinaryExpression =>
+  self: BinaryPredicate =>
 
-  type EvaluatedType = Any
+  override type EvaluatedType = Any
 
   override def nullable: Boolean = left.nullable || right.nullable
-  override def dataType: DataType = BooleanType
 
   def compare(l: String, r: String): Boolean
 
@@ -184,7 +183,7 @@ trait StringComparison {
  * A function that returns true if the string `left` contains the string `right`.
  */
 case class Contains(left: Expression, right: Expression)
-    extends BinaryExpression with StringComparison {
+    extends BinaryPredicate with StringComparison {
   override def compare(l: String, r: String): Boolean = l.contains(r)
 }
 
@@ -192,7 +191,7 @@ case class Contains(left: Expression, right: Expression)
  * A function that returns true if the string `left` starts with the string `right`.
  */
 case class StartsWith(left: Expression, right: Expression)
-    extends BinaryExpression with StringComparison {
+    extends BinaryPredicate with StringComparison {
   override def compare(l: String, r: String): Boolean = l.startsWith(r)
 }
 
@@ -200,7 +199,7 @@ case class StartsWith(left: Expression, right: Expression)
  * A function that returns true if the string `left` ends with the string `right`.
  */
 case class EndsWith(left: Expression, right: Expression)
-    extends BinaryExpression with StringComparison {
+    extends BinaryPredicate with StringComparison {
   override def compare(l: String, r: String): Boolean = l.endsWith(r)
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/26b415e1/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
index 72ddc0e..773bd16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -45,7 +45,9 @@ case class SimpleFilteredScan(from: Int, to: Int)(@transient val sqlContext: SQL
     val rowBuilders = requiredColumns.map {
       case "a" => (i: Int) => Seq(i)
       case "b" => (i: Int) => Seq(i * 2)
-      case "c" => (i: Int) => Seq((i - 1 + 'a').toChar.toString * 10)
+      case "c" => (i: Int) =>
+        val c = (i - 1 + 'a').toChar.toString
+        Seq(c * 5 + c.toUpperCase() * 5)
     }
 
     FiltersPushed.list = filters
@@ -77,7 +79,7 @@ case class SimpleFilteredScan(from: Int, to: Int)(@transient val sqlContext: SQL
     }
 
     def eval(a: Int) = {
-      val c = (a - 1 + 'a').toChar.toString * 10
+      val c = (a - 1 + 'a').toChar.toString * 5 + (a - 1 + 'a').toChar.toString.toUpperCase() * 5
       !filters.map(translateFilterOnA(_)(a)).contains(false) &&
         !filters.map(translateFilterOnC(_)(c)).contains(false)
     }
@@ -110,7 +112,8 @@ class FilteredScanSuite extends DataSourceTest {
 
   sqlTest(
     "SELECT * FROM oneToTenFiltered",
-    (1 to 10).map(i => Row(i, i * 2, (i - 1 + 'a').toChar.toString * 10)).toSeq)
+    (1 to 10).map(i => Row(i, i * 2, (i - 1 + 'a').toChar.toString * 5
+      + (i - 1 + 'a').toChar.toString.toUpperCase() * 5)).toSeq)
 
   sqlTest(
     "SELECT a, b FROM oneToTenFiltered",
@@ -182,15 +185,15 @@ class FilteredScanSuite extends DataSourceTest {
 
   sqlTest(
     "SELECT a, b, c FROM oneToTenFiltered WHERE c like 'c%'",
-    Seq(Row(3, 3 * 2, "c" * 10)))
+    Seq(Row(3, 3 * 2, "c" * 5 + "C" * 5)))
 
   sqlTest(
-    "SELECT a, b, c FROM oneToTenFiltered WHERE c like 'd%'",
-    Seq(Row(4, 4 * 2, "d" * 10)))
+    "SELECT a, b, c FROM oneToTenFiltered WHERE c like '%D'",
+    Seq(Row(4, 4 * 2, "d" * 5 + "D" * 5)))
 
   sqlTest(
-    "SELECT a, b, c FROM oneToTenFiltered WHERE c like '%e%'",
-    Seq(Row(5, 5 * 2, "e" * 10)))
+    "SELECT a, b, c FROM oneToTenFiltered WHERE c like '%eE%'",
+    Seq(Row(5, 5 * 2, "e" * 5 + "E" * 5)))
 
   testPushDown("SELECT * FROM oneToTenFiltered WHERE A = 1", 1)
   testPushDown("SELECT a FROM oneToTenFiltered WHERE A = 1", 1)
@@ -222,6 +225,15 @@ class FilteredScanSuite extends DataSourceTest {
   testPushDown("SELECT * FROM oneToTenFiltered WHERE a < 3 OR a > 8", 4)
   testPushDown("SELECT * FROM oneToTenFiltered WHERE NOT (a < 6)", 5)
 
+  testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like 'c%'", 1)
+  testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like 'C%'", 0)
+
+  testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%D'", 1)
+  testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%d'", 0)
+
+  testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%eE%'", 1)
+  testPushDown("SELECT a, b, c FROM oneToTenFiltered WHERE c like '%Ee%'", 0)
+
   def testPushDown(sqlString: String, expectedCount: Int): Unit = {
     test(s"PushDown Returns $expectedCount: $sqlString") {
       val queryExecution = sql(sqlString).queryExecution


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org