You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2020/11/27 02:11:04 UTC

[arrow] branch master updated: ARROW-10662: [Java] Avoid integer overflow for Json file reader

This is an automated email from the ASF dual-hosted git repository.

liyafan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new d2b3cc4  ARROW-10662: [Java] Avoid integer overflow for Json file reader
d2b3cc4 is described below

commit d2b3cc47d5c5b2e17d3ef09f3c4f16b9952f181a
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Fri Nov 27 10:09:55 2020 +0800

    ARROW-10662: [Java] Avoid integer overflow for Json file reader
    
    For the current implementation, it uses int to represent the buffer size. However, the buffer can be larger than Integer.MAX_VALUE, which will lead to integer overflow and unexpected behaviors.
    
    Closes #8721 from liyafan82/fly_1119_js
    
    Authored-by: liyafan82 <fa...@foxmail.com>
    Signed-off-by: liyafan82 <fa...@foxmail.com>
---
 .../org/apache/arrow/vector/DecimalVector.java     |   6 +-
 .../apache/arrow/vector/ipc/JsonFileReader.java    | 150 ++++++++-------------
 2 files changed, 60 insertions(+), 96 deletions(-)

diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index 472c2db..f988f4f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -147,7 +147,7 @@ public final class DecimalVector extends BaseFixedWidthVector {
     holder.buffer = valueBuffer;
     holder.precision = precision;
     holder.scale = scale;
-    holder.start = index * TYPE_WIDTH;
+    holder.start = (long) index * TYPE_WIDTH;
   }
 
   /**
@@ -421,7 +421,7 @@ public final class DecimalVector extends BaseFixedWidthVector {
   }
 
   /**
-   * Same as {@link #set(int, int, ArrowBuf)} except that it handles the
+   * Same as {@link #set(int, long, ArrowBuf)} except that it handles the
    * case when index is greater than or equal to existing
    * value capacity {@link #getValueCapacity()}.
    *
@@ -504,7 +504,7 @@ public final class DecimalVector extends BaseFixedWidthVector {
   }
 
   /**
-   * Same as {@link #setSafe(int, int, int, ArrowBuf)} except that it handles
+   * Same as {@link #set(int, int, long, ArrowBuf)} except that it handles
    * the case when the position of new value is beyond the current value
    * capacity of the vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
index 40f83c0..50e8b33 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
@@ -264,7 +264,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader DAY_MILLIS = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * IntervalDayVector.TYPE_WIDTH;
+        final long size = (long) count * IntervalDayVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -281,7 +281,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader INT1 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * TinyIntVector.TYPE_WIDTH;
+        final long size = (long) count * TinyIntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -296,7 +296,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader INT2 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * SmallIntVector.TYPE_WIDTH;
+        final long size = (long) count * SmallIntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -311,7 +311,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader INT4 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * IntVector.TYPE_WIDTH;
+        final long size = (long) count * IntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -326,7 +326,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader INT8 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * BigIntVector.TYPE_WIDTH;
+        final long size = (long) count * BigIntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -342,7 +342,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader UINT1 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * TinyIntVector.TYPE_WIDTH;
+        final long size = (long) count * TinyIntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -357,7 +357,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader UINT2 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * SmallIntVector.TYPE_WIDTH;
+        final long size = (long) count * SmallIntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -372,7 +372,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader UINT4 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * IntVector.TYPE_WIDTH;
+        final long size = (long) count * IntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -387,7 +387,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader UINT8 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * BigIntVector.TYPE_WIDTH;
+        final long size = (long) count * BigIntVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -403,7 +403,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader FLOAT4 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * Float4Vector.TYPE_WIDTH;
+        final long size = (long) count * Float4Vector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -418,7 +418,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader FLOAT8 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * Float8Vector.TYPE_WIDTH;
+        final long size = (long) count * Float8Vector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -433,7 +433,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader DECIMAL = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * DecimalVector.TYPE_WIDTH;
+        final long size = (long) count * DecimalVector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -450,7 +450,7 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
     BufferReader DECIMAL256 = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        final int size = count * Decimal256Vector.TYPE_WIDTH;
+        final long size = (long) count * Decimal256Vector.TYPE_WIDTH;
         ArrowBuf buf = allocator.buffer(size);
 
         for (int i = 0; i < count; i++) {
@@ -464,114 +464,78 @@ public class JsonFileReader implements AutoCloseable, DictionaryProvider {
       }
     };
 
+    ArrowBuf readBinaryValues(
+        BufferAllocator allocator, int count) throws IOException {
+      ArrayList<byte[]> values = new ArrayList<>(count);
+      long bufferSize = 0L;
+      for (int i = 0; i < count; i++) {
+        parser.nextToken();
+        final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
+        values.add(value);
+        bufferSize += value.length;
+      }
+
+      ArrowBuf buf = allocator.buffer(bufferSize);
+
+      for (byte[] value : values) {
+        buf.writeBytes(value);
+      }
+
+      return buf;
+    }
+
+    ArrowBuf readStringValues(
+        BufferAllocator allocator, int count) throws IOException {
+      ArrayList<byte[]> values = new ArrayList<>(count);
+      long bufferSize = 0L;
+      for (int i = 0; i < count; i++) {
+        parser.nextToken();
+        final byte[] value = parser.getValueAsString().getBytes(UTF_8);
+        values.add(value);
+        bufferSize += value.length;
+      }
+
+      ArrowBuf buf = allocator.buffer(bufferSize);
+
+      for (byte[] value : values) {
+        buf.writeBytes(value);
+      }
+
+      return buf;
+    }
 
     BufferReader FIXEDSIZEBINARY = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        ArrayList<byte[]> values = new ArrayList<>();
-        for (int i = 0; i < count; i++) {
-          parser.nextToken();
-          final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
-          values.add(value);
-        }
-
-        int byteWidth = count > 0 ? values.get(0).length : 0;
-        ArrowBuf buf = allocator.buffer(byteWidth * count);
-        for (byte[] value : values) {
-          buf.writeBytes(value);
-        }
-
-        return buf;
+        return readBinaryValues(allocator, count);
       }
     };
 
     BufferReader VARCHAR = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        ArrayList<byte[]> values = new ArrayList<>();
-        int bufferSize = 0;
-        for (int i = 0; i < count; i++) {
-          parser.nextToken();
-          final byte[] value = parser.getValueAsString().getBytes(UTF_8);
-          values.add(value);
-          bufferSize += value.length;
-
-        }
-
-        ArrowBuf buf = allocator.buffer(bufferSize);
-
-        for (byte[] value : values) {
-          buf.writeBytes(value);
-        }
-
-        return buf;
+        return readStringValues(allocator, count);
       }
     };
 
     BufferReader LARGEVARCHAR = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        ArrayList<byte[]> values = new ArrayList<>();
-        long bufferSize = 0L;
-        for (int i = 0; i < count; i++) {
-          parser.nextToken();
-          final byte[] value = parser.getValueAsString().getBytes(UTF_8);
-          values.add(value);
-          bufferSize += value.length;
-        }
-
-        ArrowBuf buf = allocator.buffer(bufferSize);
-
-        for (byte[] value : values) {
-          buf.writeBytes(value);
-        }
-
-        return buf;
+        return readStringValues(allocator, count);
       }
     };
 
     BufferReader VARBINARY = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        ArrayList<byte[]> values = new ArrayList<>();
-        int bufferSize = 0;
-        for (int i = 0; i < count; i++) {
-          parser.nextToken();
-          final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
-          values.add(value);
-          bufferSize += value.length;
-
-        }
-
-        ArrowBuf buf = allocator.buffer(bufferSize);
-
-        for (byte[] value : values) {
-          buf.writeBytes(value);
-        }
-
-        return buf;
+        return readBinaryValues(allocator, count);
       }
     };
 
     BufferReader LARGEVARBINARY = new BufferReader() {
       @Override
       protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
-        ArrayList<byte[]> values = new ArrayList<>();
-        long bufferSize = 0L;
-        for (int i = 0; i < count; i++) {
-          parser.nextToken();
-          final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
-          values.add(value);
-          bufferSize += value.length;
-        }
-
-        ArrowBuf buf = allocator.buffer(bufferSize);
-
-        for (byte[] value : values) {
-          buf.writeBytes(value);
-        }
-
-        return buf;
+        return readBinaryValues(allocator, count);
       }
     };
   }