You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/06/12 00:22:41 UTC

[spark] branch branch-3.0 updated: [SPARK-31916][SQL] StringConcat can lead to StringIndexOutOfBoundsException

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f61b31a  [SPARK-31916][SQL] StringConcat can lead to StringIndexOutOfBoundsException
f61b31a is described below

commit f61b31a5a484c7e90920ec36c456594ce92cdf73
Author: Dilip Biswal <dk...@gmail.com>
AuthorDate: Fri Jun 12 09:19:29 2020 +0900

    [SPARK-31916][SQL] StringConcat can lead to StringIndexOutOfBoundsException
    
    ### What changes were proposed in this pull request?
    A minor fix to fix the append method of StringConcat to cap the length at MAX_ROUNDED_ARRAY_LENGTH to make sure it does not overflow and cause StringIndexOutOfBoundsException
    
    Thanks to **Jeffrey Stokes** for reporting the issue and explaining the underlying problem in detail in the JIRA.
    
    ### Why are the changes needed?
    This fixes StringIndexOutOfBoundsException on an overflow.
    
    ### Does this PR introduce any user-facing change?
    No.
    
    ### How was this patch tested?
    Added a test in StringsUtilSuite.
    
    Closes #28750 from dilipbiswal/SPARK-31916.
    
    Authored-by: Dilip Biswal <dk...@gmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
    (cherry picked from commit b87a342c7dd51046fcbe323db640c825646fb8d4)
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../spark/sql/catalyst/util/StringUtils.scala      |  6 +++-
 .../spark/sql/catalyst/util/StringUtilsSuite.scala | 32 +++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index b42ae4e..2a416d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -123,7 +123,11 @@ object StringUtils extends Logging {
           val stringToAppend = if (available >= sLen) s else s.substring(0, available)
           strings.append(stringToAppend)
         }
-        length += sLen
+
+        // Keeps the total length of appended strings. Note that we need to cap the length at
+        // `ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH`; otherwise, we will overflow
+        // length causing StringIndexOutOfBoundsException in the substring call above.
+        length = Math.min(length.toLong + sLen, ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH).toInt
       }
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 67bc4bc..c68e89fc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.catalyst.util
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.StringUtils._
+import org.apache.spark.sql.internal.SQLConf
 
-class StringUtilsSuite extends SparkFunSuite {
+class StringUtilsSuite extends SparkFunSuite with SQLHelper {
 
   test("escapeLikeRegex") {
     val expectedEscapedStrOne = "(?s)\\Qa\\E\\Qb\\E\\Qd\\E\\Qe\\E\\Qf\\E"
@@ -98,4 +100,32 @@ class StringUtilsSuite extends SparkFunSuite {
     assert(checkLimit("1234567"))
     assert(checkLimit("1234567890"))
   }
+
+  test("SPARK-31916: StringConcat doesn't overflow on many inputs") {
+    val concat = new StringConcat(maxLength = 100)
+    val stringToAppend = "Test internal index of StringConcat does not overflow with many " +
+      "append calls"
+    0.to((Integer.MAX_VALUE / stringToAppend.length) + 1).foreach { _ =>
+      concat.append(stringToAppend)
+    }
+    assert(concat.toString.length === 100)
+  }
+
+  test("SPARK-31916: verify that PlanStringConcat's output shows the actual length of the plan") {
+    withSQLConf(SQLConf.MAX_PLAN_STRING_LENGTH.key -> "0") {
+      val concat = new PlanStringConcat()
+      0.to(3).foreach { i =>
+        concat.append(s"plan fragment $i")
+      }
+      assert(concat.toString === "Truncated plan of 60 characters")
+    }
+
+    withSQLConf(SQLConf.MAX_PLAN_STRING_LENGTH.key -> "60") {
+      val concat = new PlanStringConcat()
+      0.to(2).foreach { i =>
+        concat.append(s"plan fragment $i")
+      }
+      assert(concat.toString === "plan fragment 0plan fragment 1... 15 more characters")
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org