You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2020/11/27 02:11:04 UTC
[arrow] branch master updated: ARROW-10662: [Java] Avoid integer
overflow for Json file reader
This is an automated email from the ASF dual-hosted git repository.
liyafan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d2b3cc4 ARROW-10662: [Java] Avoid integer overflow for Json file reader
d2b3cc4 is described below
commit d2b3cc47d5c5b2e17d3ef09f3c4f16b9952f181a
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Fri Nov 27 10:09:55 2020 +0800
ARROW-10662: [Java] Avoid integer overflow for Json file reader
For the current implementation, it uses int to represent the buffer size. However, the buffer can be larger than Integer.MAX_VALUE, which will lead to integer overflow and unexpected behaviors.
Closes #8721 from liyafan82/fly_1119_js
Authored-by: liyafan82 <fa...@foxmail.com>
Signed-off-by: liyafan82 <fa...@foxmail.com>
---
.../org/apache/arrow/vector/DecimalVector.java | 6 +-
.../apache/arrow/vector/ipc/JsonFileReader.java | 150 ++++++++-------------
2 files changed, 60 insertions(+), 96 deletions(-)
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index 472c2db..f988f4f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -147,7 +147,7 @@ public final class DecimalVector extends BaseFixedWidthVector {
holder.buffer = valueBuffer;
holder.precision = precision;
holder.scale = scale;
- holder.start = index * TYPE_WIDTH;
+ holder.start = (long) index * TYPE_WIDTH;
}
/**
@@ -421,7 +421,7 @@ public final class DecimalVector extends BaseFixedWidthVector {
}
/**
- * Same as {@link #set(int, int, ArrowBuf)} except that it handles the
+ * Same as {@link #set(int, long, ArrowBuf)} except that it handles the
* case when index is greater than or equal to existing
* value capacity {@link #getValueCapacity()}.
*
@@ -504,7 +504,7 @@ public final class DecimalVector extends BaseFixedWidthVector {
}
/**
- * Same as {@link #setSafe(int, int, int, ArrowBuf)} except that it handles
+ * Same as {@link #set(int, int, long, ArrowBuf)} except that it handles
* the case when the position of new value is beyond the current value
* capacity of the vector.
*
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
index 40f83c0..50e8b33 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
@@ -264,7 +264,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader DAY_MILLIS = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * IntervalDayVector.TYPE_WIDTH;
+ final long size = (long) count * IntervalDayVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -281,7 +281,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader INT1 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * TinyIntVector.TYPE_WIDTH;
+ final long size = (long) count * TinyIntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -296,7 +296,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader INT2 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * SmallIntVector.TYPE_WIDTH;
+ final long size = (long) count * SmallIntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -311,7 +311,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader INT4 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * IntVector.TYPE_WIDTH;
+ final long size = (long) count * IntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -326,7 +326,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader INT8 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * BigIntVector.TYPE_WIDTH;
+ final long size = (long) count * BigIntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -342,7 +342,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader UINT1 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * TinyIntVector.TYPE_WIDTH;
+ final long size = (long) count * TinyIntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -357,7 +357,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader UINT2 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * SmallIntVector.TYPE_WIDTH;
+ final long size = (long) count * SmallIntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -372,7 +372,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader UINT4 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * IntVector.TYPE_WIDTH;
+ final long size = (long) count * IntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -387,7 +387,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader UINT8 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * BigIntVector.TYPE_WIDTH;
+ final long size = (long) count * BigIntVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -403,7 +403,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader FLOAT4 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * Float4Vector.TYPE_WIDTH;
+ final long size = (long) count * Float4Vector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -418,7 +418,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader FLOAT8 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * Float8Vector.TYPE_WIDTH;
+ final long size = (long) count * Float8Vector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -433,7 +433,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader DECIMAL = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * DecimalVector.TYPE_WIDTH;
+ final long size = (long) count * DecimalVector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -450,7 +450,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
BufferReader DECIMAL256 = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- final int size = count * Decimal256Vector.TYPE_WIDTH;
+ final long size = (long) count * Decimal256Vector.TYPE_WIDTH;
ArrowBuf buf = allocator.buffer(size);
for (int i = 0; i < count; i++) {
@@ -464,114 +464,78 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
}
};
+ ArrowBuf readBinaryValues(
+ BufferAllocator allocator, int count) throws IOException {
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
+ values.add(value);
+ bufferSize += value.length;
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+
+ return buf;
+ }
+
+ ArrowBuf readStringValues(
+ BufferAllocator allocator, int count) throws IOException {
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ final byte[] value = parser.getValueAsString().getBytes(UTF_8);
+ values.add(value);
+ bufferSize += value.length;
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+
+ return buf;
+ }
BufferReader FIXEDSIZEBINARY = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- ArrayList<byte[]> values = new ArrayList<>();
- for (int i = 0; i < count; i++) {
- parser.nextToken();
- final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
- values.add(value);
- }
-
- int byteWidth = count > 0 ? values.get(0).length : 0;
- ArrowBuf buf = allocator.buffer(byteWidth * count);
- for (byte[] value : values) {
- buf.writeBytes(value);
- }
-
- return buf;
+ return readBinaryValues(allocator, count);
}
};
BufferReader VARCHAR = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- ArrayList<byte[]> values = new ArrayList<>();
- int bufferSize = 0;
- for (int i = 0; i < count; i++) {
- parser.nextToken();
- final byte[] value = parser.getValueAsString().getBytes(UTF_8);
- values.add(value);
- bufferSize += value.length;
-
- }
-
- ArrowBuf buf = allocator.buffer(bufferSize);
-
- for (byte[] value : values) {
- buf.writeBytes(value);
- }
-
- return buf;
+ return readStringValues(allocator, count);
}
};
BufferReader LARGEVARCHAR = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- ArrayList<byte[]> values = new ArrayList<>();
- long bufferSize = 0L;
- for (int i = 0; i < count; i++) {
- parser.nextToken();
- final byte[] value = parser.getValueAsString().getBytes(UTF_8);
- values.add(value);
- bufferSize += value.length;
- }
-
- ArrowBuf buf = allocator.buffer(bufferSize);
-
- for (byte[] value : values) {
- buf.writeBytes(value);
- }
-
- return buf;
+ return readStringValues(allocator, count);
}
};
BufferReader VARBINARY = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- ArrayList<byte[]> values = new ArrayList<>();
- int bufferSize = 0;
- for (int i = 0; i < count; i++) {
- parser.nextToken();
- final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
- values.add(value);
- bufferSize += value.length;
-
- }
-
- ArrowBuf buf = allocator.buffer(bufferSize);
-
- for (byte[] value : values) {
- buf.writeBytes(value);
- }
-
- return buf;
+ return readBinaryValues(allocator, count);
}
};
BufferReader LARGEVARBINARY = new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
- ArrayList<byte[]> values = new ArrayList<>();
- long bufferSize = 0L;
- for (int i = 0; i < count; i++) {
- parser.nextToken();
- final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
- values.add(value);
- bufferSize += value.length;
- }
-
- ArrowBuf buf = allocator.buffer(bufferSize);
-
- for (byte[] value : values) {
- buf.writeBytes(value);
- }
-
- return buf;
+ return readBinaryValues(allocator, count);
}
};
}