You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/28 19:30:46 UTC
[spark] branch branch-2.4 updated: [SPARK-32115][SQL] Fix SUBSTRING
to handle integer overflows
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 1eda585 [SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
1eda585 is described below
commit 1eda58549a54e6514bf3822b830c81eee617f3ec
Author: Yuanjian Li <xy...@gmail.com>
AuthorDate: Sun Jun 28 12:22:44 2020 -0700
[SPARK-32115][SQL] Fix SUBSTRING to handle integer overflows
Bug fix for overflow case in `UTF8String.substringSQL`.
SQL query `SELECT SUBSTRING("abc", -1207959552, -1207959552)` incorrectly returns` "abc"` against expected output of `""`. For query `SUBSTRING("abc", -100, -100)`, we'll get the right output of `""`.
Yes, bug fix for the overflow case.
New UT.
Closes #28937 from xuanyuanking/SPARK-32115.
Authored-by: Yuanjian Li <xy...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit 6484c14c57434dd6961cf9e9e73bbe8aa04cda15)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../main/java/org/apache/spark/unsafe/types/UTF8String.java | 11 ++++++++++-
.../java/org/apache/spark/unsafe/types/UTF8StringSuite.java | 4 ++++
.../sql/catalyst/expressions/StringExpressionsSuite.scala | 4 ++++
3 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index dff4a73..101ac23 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -340,8 +340,17 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
// to the -ith element before the end of the sequence. If a start index i is 0, it
// refers to the first element.
int len = numChars();
+ // `len + pos` does not overflow as `len >= 0`.
int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
- int end = (length == Integer.MAX_VALUE) ? len : start + length;
+
+ int end;
+ if ((long) start + length > Integer.MAX_VALUE) {
+ end = Integer.MAX_VALUE;
+ } else if ((long) start + length < Integer.MIN_VALUE) {
+ end = Integer.MIN_VALUE;
+ } else {
+ end = start + length;
+ }
return substring(start, end);
}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index dae13f0..a124f0e 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -389,6 +389,10 @@ public class UTF8StringSuite {
assertEquals(e.substringSQL(0, Integer.MAX_VALUE), fromString("example"));
assertEquals(e.substringSQL(1, Integer.MAX_VALUE), fromString("example"));
assertEquals(e.substringSQL(2, Integer.MAX_VALUE), fromString("xample"));
+ assertEquals(EMPTY_UTF8, e.substringSQL(-100, -100));
+ assertEquals(EMPTY_UTF8, e.substringSQL(-1207959552, -1207959552));
+ assertEquals(fromString("pl"), e.substringSQL(-3, 2));
+ assertEquals(EMPTY_UTF8, e.substringSQL(Integer.MIN_VALUE, 6));
}
@Test
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index aa334e0..37bf9e5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -229,6 +229,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
"xample",
row)
+ // Substring with from negative position with negative length
+ checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType),
+ Literal.create(-1207959552, IntegerType)), "", row)
+
val s_notNull = 'a.string.notNull.at(0)
assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org