You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/07/11 04:20:06 UTC
git commit: [SPARK-2431][SQL] Refine StringComparison and related
codes.
Repository: spark
Updated Branches:
refs/heads/master ae8ca4dfb -> f62c42728
[SPARK-2431][SQL] Refine StringComparison and related codes.
Refine `StringComparison` and related codes as follows:
- `StringComparison` could be similar to `StringRegexExpression` or `CaseConversionExpression`.
- Nullability of `StringRegexExpression` could depend on children's nullabilities.
- Add a case that the like condition includes no wildcard to `LikeSimplification`.
Author: Takuya UESHIN <ue...@happy-camper.st>
Closes #1357 from ueshin/issues/SPARK-2431 and squashes the following commits:
77766f5 [Takuya UESHIN] Add a case that the like condition includes no wildcard to LikeSimplification.
b9da9d2 [Takuya UESHIN] Fix nullability of StringRegexExpression.
680bb72 [Takuya UESHIN] Refine StringComparison.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f62c4272
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f62c4272
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f62c4272
Branch: refs/heads/master
Commit: f62c42728990266d5d5099abe241f699189ba025
Parents: ae8ca4d
Author: Takuya UESHIN <ue...@happy-camper.st>
Authored: Thu Jul 10 19:20:00 2014 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Thu Jul 10 19:20:00 2014 -0700
----------------------------------------------------------------------
.../catalyst/expressions/stringOperations.scala | 28 +++++++++-----------
.../sql/catalyst/optimizer/Optimizer.scala | 3 +++
2 files changed, 16 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f62c4272/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 347471c..b385053 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.catalyst.types.StringType
import org.apache.spark.sql.catalyst.types.BooleanType
-
trait StringRegexExpression {
self: BinaryExpression =>
@@ -32,7 +31,7 @@ trait StringRegexExpression {
def escape(v: String): String
def matches(regex: Pattern, str: String): Boolean
- def nullable: Boolean = true
+ def nullable: Boolean = left.nullable || right.nullable
def dataType: DataType = BooleanType
// try cache the pattern for Literal
@@ -157,19 +156,13 @@ case class Lower(child: Expression) extends UnaryExpression with CaseConversionE
override def toString() = s"Lower($child)"
}
-/** A base class for functions that compare two strings, returning a boolean. */
-abstract class StringComparison extends Expression {
- self: Product =>
+/** A base trait for functions that compare two strings, returning a boolean. */
+trait StringComparison {
+ self: BinaryExpression =>
type EvaluatedType = Any
- def left: Expression
- def right: Expression
-
- override def references = children.flatMap(_.references).toSet
- override def children = left :: right :: Nil
-
- override def nullable: Boolean = true
+ def nullable: Boolean = left.nullable || right.nullable
override def dataType: DataType = BooleanType
def compare(l: String, r: String): Boolean
@@ -184,26 +177,31 @@ abstract class StringComparison extends Expression {
}
}
+ def symbol: String = nodeName
+
override def toString() = s"$nodeName($left, $right)"
}
/**
* A function that returns true if the string `left` contains the string `right`.
*/
-case class Contains(left: Expression, right: Expression) extends StringComparison {
+case class Contains(left: Expression, right: Expression)
+ extends BinaryExpression with StringComparison {
override def compare(l: String, r: String) = l.contains(r)
}
/**
* A function that returns true if the string `left` starts with the string `right`.
*/
-case class StartsWith(left: Expression, right: Expression) extends StringComparison {
+case class StartsWith(left: Expression, right: Expression)
+ extends BinaryExpression with StringComparison {
def compare(l: String, r: String) = l.startsWith(r)
}
/**
* A function that returns true if the string `left` ends with the string `right`.
*/
-case class EndsWith(left: Expression, right: Expression) extends StringComparison {
+case class EndsWith(left: Expression, right: Expression)
+ extends BinaryExpression with StringComparison {
def compare(l: String, r: String) = l.endsWith(r)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/f62c4272/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f0904f5..a142310 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -123,6 +123,7 @@ object LikeSimplification extends Rule[LogicalPlan] {
val startsWith = "([^_%]+)%".r
val endsWith = "%([^_%]+)".r
val contains = "%([^_%]+)%".r
+ val equalTo = "([^_%]*)".r
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
case Like(l, Literal(startsWith(pattern), StringType)) if !pattern.endsWith("\\") =>
@@ -131,6 +132,8 @@ object LikeSimplification extends Rule[LogicalPlan] {
EndsWith(l, Literal(pattern))
case Like(l, Literal(contains(pattern), StringType)) if !pattern.endsWith("\\") =>
Contains(l, Literal(pattern))
+ case Like(l, Literal(equalTo(pattern), StringType)) =>
+ EqualTo(l, Literal(pattern))
}
}