You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/07/16 07:43:51 UTC

git commit: [SPARK-2504][SQL] Fix nullability of Substring expression.

Repository: spark
Updated Branches:
  refs/heads/master 9b38b7c71 -> 632fb3d9a


[SPARK-2504][SQL] Fix nullability of Substring expression.

This is a follow-up of #1359 with nullability narrowing.

Author: Takuya UESHIN <ue...@happy-camper.st>

Closes #1426 from ueshin/issues/SPARK-2504 and squashes the following commits:

5157832 [Takuya UESHIN] Remove unnecessary white spaces.
80958ac [Takuya UESHIN] Fix nullability of Substring expression.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/632fb3d9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/632fb3d9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/632fb3d9

Branch: refs/heads/master
Commit: 632fb3d9a9ebb3d2218385403145d5b89c41c025
Parents: 9b38b7c
Author: Takuya UESHIN <ue...@happy-camper.st>
Authored: Tue Jul 15 22:43:48 2014 -0700
Committer: Reynold Xin <rx...@apache.org>
Committed: Tue Jul 15 22:43:48 2014 -0700

----------------------------------------------------------------------
 .../catalyst/expressions/stringOperations.scala | 24 ++++++++++----------
 .../expressions/ExpressionEvaluationSuite.scala | 14 ++++++++----
 2 files changed, 22 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/632fb3d9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 4bd7bf5..f1b27c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -215,19 +215,19 @@ case class EndsWith(left: Expression, right: Expression)
 case class Substring(str: Expression, pos: Expression, len: Expression) extends Expression {
   
   type EvaluatedType = Any
-  
-  def nullable: Boolean = true
+
+  def nullable: Boolean = str.nullable || pos.nullable || len.nullable
   def dataType: DataType = {
     if (!resolved) {
       throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved")
     }
     if (str.dataType == BinaryType) str.dataType else StringType
   }
-  
+
   def references = children.flatMap(_.references).toSet
-  
+
   override def children = str :: pos :: len :: Nil
-  
+
   @inline
   def slice[T, C <: Any](str: C, startPos: Int, sliceLen: Int)
       (implicit ev: (C=>IndexedSeqOptimized[T,_])): Any = {
@@ -237,40 +237,40 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends
     // refers to element i-1 in the sequence. If a start index i is less than 0, it refers
     // to the -ith element before the end of the sequence. If a start index i is 0, it
     // refers to the first element.
-    
+
     val start = startPos match {
       case pos if pos > 0 => pos - 1
       case neg if neg < 0 => len + neg
       case _ => 0
     }
-    
+
     val end = sliceLen match {
       case max if max == Integer.MAX_VALUE => max
       case x => start + x
     }
-      
+
     str.slice(start, end)    
   }
-  
+
   override def eval(input: Row): Any = {
     val string = str.eval(input)
 
     val po = pos.eval(input)
     val ln = len.eval(input)
-    
+
     if ((string == null) || (po == null) || (ln == null)) {
       null
     } else {
       val start = po.asInstanceOf[Int]
       val length = ln.asInstanceOf[Int] 
-      
+
       string match {
         case ba: Array[Byte] => slice(ba, start, length)
         case other => slice(other.toString, start, length)
       }
     }
   }
-  
+
   override def toString = len match {
     case max if max == Integer.MAX_VALUE => s"SUBSTR($str, $pos)"
     case _ => s"SUBSTR($str, $pos, $len)"

http://git-wip-us.apache.org/repos/asf/spark/blob/632fb3d9/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index f1d7aed..143330b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -469,9 +469,9 @@ class ExpressionEvaluationSuite extends FunSuite {
   
   test("Substring") {
     val row = new GenericRow(Array[Any]("example", "example".toArray.map(_.toByte)))
-    
+
     val s = 'a.string.at(0)
-    
+
     // substring from zero position with less-than-full length
     checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)), "ex", row)
     checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(2, IntegerType)), "ex", row)
@@ -501,7 +501,7 @@ class ExpressionEvaluationSuite extends FunSuite {
 
     // substring(null, _, _) -> null
     checkEvaluation(Substring(s, Literal(100, IntegerType), Literal(4, IntegerType)), null, new GenericRow(Array[Any](null)))
-    
+
     // substring(_, null, _) -> null
     checkEvaluation(Substring(s, Literal(null, IntegerType), Literal(4, IntegerType)), null, row)
 
@@ -514,6 +514,12 @@ class ExpressionEvaluationSuite extends FunSuite {
 
     // 2-arg substring from nonzero position
     checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(Integer.MAX_VALUE, IntegerType)), "xample", row)
+
+    val s_notNull = 'a.string.notNull.at(0)
+
+    assert(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === true)
+    assert(Substring(s_notNull, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === false)
+    assert(Substring(s_notNull, Literal(null, IntegerType), Literal(2, IntegerType)).nullable === true)
+    assert(Substring(s_notNull, Literal(0, IntegerType), Literal(null, IntegerType)).nullable === true)
   }
 }
-