You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by GitBox <gi...@apache.org> on 2018/07/06 17:44:31 UTC

[GitHub] Ben-Zvi closed pull request #1354: DRILL-6570: Fixed IndexOutofBoundException in Parquet Reader

Ben-Zvi closed pull request #1354: DRILL-6570: Fixed IndexOutofBoundException in Parquet Reader
URL: https://github.com/apache/drill/pull/1354
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenColumnBulkEntry.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenColumnBulkEntry.java
index bc7741553d..e37700a3f1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenColumnBulkEntry.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLenColumnBulkEntry.java
@@ -18,7 +18,7 @@
 package org.apache.drill.exec.store.parquet.columnreaders;
 
 import org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionInfo;
-import org.apache.drill.exec.store.parquet.columnreaders.VarLenColumnBulkInput.ColumnPrecisionType;
+import org.apache.drill.exec.vector.UInt4Vector;
 import org.apache.drill.exec.vector.VarLenBulkEntry;
 
 import io.netty.buffer.DrillBuf;
@@ -55,25 +55,17 @@
   }
 
   VarLenColumnBulkEntry(ColumnPrecisionInfo columnPrecInfo, int buffSz) {
-    int lengthSz = -1;
-    int dataSz = -1;
-
-    if (ColumnPrecisionType.isPrecTypeFixed(columnPrecInfo.columnPrecisionType)) {
-      final int expectedDataLen = columnPrecInfo.precision;
-      final int maxNumValues = buffSz / (4 + expectedDataLen);
-      lengthSz = maxNumValues;
-      dataSz = maxNumValues * expectedDataLen + PADDING;
-
-    } else {
-      // For variable length data, we need to handle a) maximum number of entries and b) max entry length
-      final int smallestDataLen = 1;
-      final int largestDataLen = buffSz - 4;
-      final int maxNumValues = buffSz / (4 + smallestDataLen);
-      lengthSz = maxNumValues;
-      dataSz = largestDataLen + PADDING;
-    }
-
-    this.lengths       = new int[lengthSz];
+
+    // For variable length data, we need to handle a) maximum number of entries
+    // and b) max entry length. Note that we don't optimize for fixed length
+    // columns as the reader can notice a false-positive (that is, the first
+    // values were fixed but not the rest).
+    final int largestDataLen = buffSz - UInt4Vector.VALUE_WIDTH;
+    final int maxNumValues = buffSz / UInt4Vector.VALUE_WIDTH;
+    final int lengthSz = maxNumValues;
+    final int dataSz = largestDataLen + PADDING;
+
+    this.lengths = new int[lengthSz];
     this.internalArray = new byte[dataSz];
   }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services