You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/28 19:25:41 UTC
[spark] branch master updated: [SPARK-32115][SQL] Fix SUBSTRING to
handle integer overflows
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6484c14 [SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
6484c14 is described below
commit 6484c14c57434dd6961cf9e9e73bbe8aa04cda15
Author: Yuanjian Li <xy...@gmail.com>
AuthorDate: Sun Jun 28 12:22:44 2020 -0700
[SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
### What changes were proposed in this pull request?
Bug fix for overflow case in `UTF8String.substringSQL`.
### Why are the changes needed?
SQL query `SELECT SUBSTRING("abc", -1207959552, -1207959552)` incorrectly returns` "abc"` against expected output of `""`. For query `SUBSTRING("abc", -100, -100)`, we'll get the right output of `""`.
### Does this PR introduce _any_ user-facing change?
Yes, bug fix for the overflow case.
### How was this patch tested?
New UT.
Closes #28937 from xuanyuanking/SPARK-32115.
Authored-by: Yuanjian Li <xy...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../main/java/org/apache/spark/unsafe/types/UTF8String.java | 11 ++++++++++-
.../java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 4 ++++
.../sql/catalyst/expressions/StringExpressionsSuite.scala | 4 ++++
3 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 186597f..7205293 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -341,8 +341,17 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
// to the -ith element before the end of the sequence. If a start index i is 0, it
// refers to the first element.
int len = numChars();
+ // `len + pos` does not overflow as `len >= 0`.
int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
- int end = (length == Integer.MAX_VALUE) ? len : start + length;
+
+ int end;
+ if ((long) start + length > Integer.MAX_VALUE) {
+ end = Integer.MAX_VALUE;
+ } else if ((long) start + length < Integer.MIN_VALUE) {
+ end = Integer.MIN_VALUE;
+ } else {
+ end = start + length;
+ }
return substring(start, end);
}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 8f93387..70e276f 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -390,6 +390,10 @@ public class UTF8StringSuite {
assertEquals(fromString("example"), e.substringSQL(0, Integer.MAX_VALUE));
assertEquals(fromString("example"), e.substringSQL(1, Integer.MAX_VALUE));
assertEquals(fromString("xample"), e.substringSQL(2, Integer.MAX_VALUE));
+ assertEquals(EMPTY_UTF8, e.substringSQL(-100, -100));
+ assertEquals(EMPTY_UTF8, e.substringSQL(-1207959552, -1207959552));
+ assertEquals(fromString("pl"), e.substringSQL(-3, 2));
+ assertEquals(EMPTY_UTF8, e.substringSQL(Integer.MIN_VALUE, 6));
}
@Test
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index f18364d..967ccc4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -236,6 +236,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
"xample",
row)
+ // Substring with from negative position with negative length
+ checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType),
+ Literal.create(-1207959552, IntegerType)), "", row)
+
val s_notNull = 'a.string.notNull.at(0)
assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org