You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/26 19:14:54 UTC

arrow git commit: ARROW-697: JAVA Throw exception for record batches > 2GB

Repository: arrow
Updated Branches:
  refs/heads/master 02c32ff93 -> 8bf61d168


ARROW-697: JAVA Throw exception for record batches > 2GB

Add a test to verify that we throw a clear error message for record batches over 2GB. This entry point is easist to test without adding some magic bytes to the tests suite since its explicit on the input, and the other public entry points for deserialization have the same checks (just extracted from the metadata).

Author: Holden Karau <ho...@us.ibm.com>

Closes #597 from holdenk/ARROW-697-java-raise-exception-for-large-batch-size and squashes the following commits:

d2d6b3d [Holden Karau] Merge branch 'master' into ARROW-697-java-raise-exception-for-large-batch-size
d56daab [Holden Karau] Throw IOException if record batch length, node length, or null count are larger than Int.MAX_VALUE
0a96b74 [Holden Karau] Add a test to verify that we throw a clear error message for record batches over 2GB in size


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8bf61d16
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8bf61d16
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8bf61d16

Branch: refs/heads/master
Commit: 8bf61d1682b883a7a538678f7f3c68dc06bb758d
Parents: 02c32ff
Author: Holden Karau <ho...@us.ibm.com>
Authored: Wed Apr 26 15:14:49 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Apr 26 15:14:49 2017 -0400

----------------------------------------------------------------------
 .../arrow/vector/stream/MessageSerializer.java    | 10 +++++++++-
 .../vector/stream/MessageSerializerTest.java      | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/8bf61d16/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
index ec7e0f2..228ab61 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
@@ -201,12 +201,17 @@ public class MessageSerializer {
 
   // Deserializes a record batch given the Flatbuffer metadata and in-memory body
   private static ArrowRecordBatch deserializeRecordBatch(RecordBatch recordBatchFB,
-      ArrowBuf body) {
+      ArrowBuf body) throws IOException {
     // Now read the body
     int nodesLength = recordBatchFB.nodesLength();
     List<ArrowFieldNode> nodes = new ArrayList<>();
     for (int i = 0; i < nodesLength; ++i) {
       FieldNode node = recordBatchFB.nodes(i);
+      if ((int)node.length() != node.length() ||
+          (int)node.nullCount() != node.nullCount()) {
+        throw new IOException("Cannot currently deserialize record batches with " +
+                              "node length larger than Int.MAX_VALUE");
+      }
       nodes.add(new ArrowFieldNode((int)node.length(), (int)node.nullCount()));
     }
     List<ArrowBuf> buffers = new ArrayList<>();
@@ -215,6 +220,9 @@ public class MessageSerializer {
       ArrowBuf vectorBuffer = body.slice((int)bufferFB.offset(), (int)bufferFB.length());
       buffers.add(vectorBuffer);
     }
+    if ((int)recordBatchFB.length() != recordBatchFB.length()) {
+      throw new IOException("Cannot currently deserialize record batches over 2GB");
+    }
     ArrowRecordBatch arrowRecordBatch =
         new ArrowRecordBatch((int)recordBatchFB.length(), nodes, buffers);
     body.release();

http://git-wip-us.apache.org/repos/asf/arrow/blob/8bf61d16/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
index d3d49d5..27879ef 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
@@ -31,6 +31,7 @@ import java.util.List;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.file.ArrowBlock;
 import org.apache.arrow.vector.file.ReadChannel;
 import org.apache.arrow.vector.file.WriteChannel;
 import org.apache.arrow.vector.schema.ArrowFieldNode;
@@ -41,6 +42,8 @@ import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Test;
+import org.junit.Rule;
+import org.junit.rules.ExpectedException;
 
 import io.netty.buffer.ArrowBuf;
 
@@ -87,6 +90,21 @@ public class MessageSerializerTest {
     assertEquals(schema, deserialized);
   }
 
+  @Rule
+  public ExpectedException expectedEx = ExpectedException.none();
+
+  @Test
+  public void testdeSerializeRecordBatchLongMetaData() throws IOException {
+    expectedEx.expect(IOException.class);
+    expectedEx.expectMessage("Cannot currently deserialize record batches over 2GB");
+    int offset = 0;
+    int metadataLength = 1;
+    long bodyLength = Integer.MAX_VALUE + 10L;
+    ArrowBlock block = new ArrowBlock(offset, metadataLength, bodyLength);
+    long totalLen = block.getMetadataLength() + block.getBodyLength();
+    MessageSerializer.deserializeRecordBatch(null, block, null);
+  }
+
   @Test
   public void testSerializeRecordBatch() throws IOException {
     byte[] validity = new byte[] { (byte)255, 0};