You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/28 19:25:41 UTC

[spark] branch master updated: [SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6484c14  [SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
6484c14 is described below

commit 6484c14c57434dd6961cf9e9e73bbe8aa04cda15
Author: Yuanjian Li <xy...@gmail.com>
AuthorDate: Sun Jun 28 12:22:44 2020 -0700

    [SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
    
    ### What changes were proposed in this pull request?
    Bug fix for overflow case in `UTF8String.substringSQL`.
    
    ### Why are the changes needed?
    SQL query `SELECT SUBSTRING("abc", -1207959552, -1207959552)` incorrectly returns` "abc"` against expected output of `""`. For query `SUBSTRING("abc", -100, -100)`, we'll get the right output of `""`.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, bug fix for the overflow case.
    
    ### How was this patch tested?
    New UT.
    
    Closes #28937 from xuanyuanking/SPARK-32115.
    
    Authored-by: Yuanjian Li <xy...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../main/java/org/apache/spark/unsafe/types/UTF8String.java   | 11 ++++++++++-
 .../java/org/apache/spark/unsafe/types/UTF8StringSuite.java   |  4 ++++
 .../sql/catalyst/expressions/StringExpressionsSuite.scala     |  4 ++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 186597f..7205293 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -341,8 +341,17 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
     // to the -ith element before the end of the sequence. If a start index i is 0, it
     // refers to the first element.
     int len = numChars();
+    // `len + pos` does not overflow as `len >= 0`.
     int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
-    int end = (length == Integer.MAX_VALUE) ? len : start + length;
+
+    int end;
+    if ((long) start + length > Integer.MAX_VALUE) {
+      end = Integer.MAX_VALUE;
+    } else if ((long) start + length < Integer.MIN_VALUE) {
+      end = Integer.MIN_VALUE;
+    } else {
+      end = start + length;
+    }
     return substring(start, end);
   }
 
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 8f93387..70e276f 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -390,6 +390,10 @@ public class UTF8StringSuite {
     assertEquals(fromString("example"), e.substringSQL(0, Integer.MAX_VALUE));
     assertEquals(fromString("example"), e.substringSQL(1, Integer.MAX_VALUE));
     assertEquals(fromString("xample"), e.substringSQL(2, Integer.MAX_VALUE));
+    assertEquals(EMPTY_UTF8, e.substringSQL(-100, -100));
+    assertEquals(EMPTY_UTF8, e.substringSQL(-1207959552, -1207959552));
+    assertEquals(fromString("pl"), e.substringSQL(-3, 2));
+    assertEquals(EMPTY_UTF8, e.substringSQL(Integer.MIN_VALUE, 6));
   }
 
   @Test
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index f18364d..967ccc4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -236,6 +236,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       "xample",
       row)
 
+    // Substring with from negative position with negative length
+    checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType),
+      Literal.create(-1207959552, IntegerType)), "", row)
+
     val s_notNull = 'a.string.notNull.at(0)
 
     assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org