You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2018/05/02 08:41:39 UTC
spark git commit: [SPARK-23976][CORE] Detect length overflow in
UTF8String.concat()/ByteArray.concat()
Repository: spark
Updated Branches:
refs/heads/master e15850be6 -> 9215ee7a1
[SPARK-23976][CORE] Detect length overflow in UTF8String.concat()/ByteArray.concat()
## What changes were proposed in this pull request?
This PR detects length overflow if total elements in inputs are not acceptable.
For example, when the three inputs has `0x7FFF_FF00`, `0x7FFF_FF00`, and `0xE00`, we should detect length overflow since we cannot allocate such a large structure on `byte[]`.
On the other hand, the current algorithm can allocate the result structure with `0x1000`-byte length due to integer sum overflow.
## How was this patch tested?
Existing UTs.
If we would create UTs, we need large heap (6-8GB). It may make test environment unstable.
If it is necessary to create UTs, I will create them.
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Closes #21064 from kiszk/SPARK-23976.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9215ee7a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9215ee7a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9215ee7a
Branch: refs/heads/master
Commit: 9215ee7a16b57c56ae927d65e024cf7afe542cbb
Parents: e15850b
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Wed May 2 10:41:34 2018 +0200
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Wed May 2 10:41:34 2018 +0200
----------------------------------------------------------------------
.../java/org/apache/spark/unsafe/types/ByteArray.java | 12 +++++++-----
.../java/org/apache/spark/unsafe/types/UTF8String.java | 8 ++++----
2 files changed, 11 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/9215ee7a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index c03caf0..ecd7c19 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -17,10 +17,12 @@
package org.apache.spark.unsafe.types;
-import org.apache.spark.unsafe.Platform;
-
import java.util.Arrays;
+import com.google.common.primitives.Ints;
+
+import org.apache.spark.unsafe.Platform;
+
public final class ByteArray {
public static final byte[] EMPTY_BYTE = new byte[0];
@@ -77,17 +79,17 @@ public final class ByteArray {
public static byte[] concat(byte[]... inputs) {
// Compute the total length of the result
- int totalLength = 0;
+ long totalLength = 0;
for (int i = 0; i < inputs.length; i++) {
if (inputs[i] != null) {
- totalLength += inputs[i].length;
+ totalLength += (long)inputs[i].length;
} else {
return null;
}
}
// Allocate a new byte array, and copy the inputs one by one into it
- final byte[] result = new byte[totalLength];
+ final byte[] result = new byte[Ints.checkedCast(totalLength)];
int offset = 0;
for (int i = 0; i < inputs.length; i++) {
int len = inputs[i].length;
http://git-wip-us.apache.org/repos/asf/spark/blob/9215ee7a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e9b3d9b..e91fc43 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -29,8 +29,8 @@ import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.KryoSerializable;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
-
import com.google.common.primitives.Ints;
+
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.hash.Murmur3_x86_32;
@@ -877,17 +877,17 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
*/
public static UTF8String concat(UTF8String... inputs) {
// Compute the total length of the result.
- int totalLength = 0;
+ long totalLength = 0;
for (int i = 0; i < inputs.length; i++) {
if (inputs[i] != null) {
- totalLength += inputs[i].numBytes;
+ totalLength += (long)inputs[i].numBytes;
} else {
return null;
}
}
// Allocate a new byte array, and copy the inputs one by one into it.
- final byte[] result = new byte[totalLength];
+ final byte[] result = new byte[Ints.checkedCast(totalLength)];
int offset = 0;
for (int i = 0; i < inputs.length; i++) {
int len = inputs[i].numBytes;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org