You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2018/10/05 16:06:22 UTC

hive git commit: HIVE-20664: Potential ArrayIndexOutOfBoundsException in VectorizedOrcAcidRowBatchReader.findMinMaxKeys (Saurabh Seth via Eugene Koifman)

Repository: hive
Updated Branches:
  refs/heads/master f0434c5b5 -> 9593e46e6


HIVE-20664: Potential ArrayIndexOutOfBoundsException in VectorizedOrcAcidRowBatchReader.findMinMaxKeys (Saurabh Seth via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9593e46e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9593e46e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9593e46e

Branch: refs/heads/master
Commit: 9593e46e63aa8f41217a5eeafd557cc6c95e473e
Parents: f0434c5
Author: Saurabh Seth <sa...@gmail.com>
Authored: Fri Oct 5 08:35:47 2018 -0700
Committer: Eugene Koifman <ek...@apache.org>
Committed: Fri Oct 5 08:35:47 2018 -0700

----------------------------------------------------------------------
 .../io/orc/VectorizedOrcAcidRowBatchReader.java | 33 +++++++++++++++++---
 1 file changed, 29 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9593e46e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index 1509bba..0cefeee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -422,10 +422,7 @@ public class VectorizedOrcAcidRowBatchReader
       if(firstStripeIndex == -1 && stripe.getOffset() >= splitStart) {
         firstStripeIndex = i;
       }
-      if(lastStripeIndex == -1 && splitEnd <= stripeEnd &&
-          stripes.get(firstStripeIndex).getOffset() <= stripe.getOffset() ) {
-        //the last condition is for when both splitStart and splitEnd are in
-        // the same stripe
+      if(lastStripeIndex == -1 && splitEnd <= stripeEnd) {
         lastStripeIndex = i;
       }
     }
@@ -435,6 +432,34 @@ public class VectorizedOrcAcidRowBatchReader
           stripes.get(stripes.size() - 1).getLength() < splitEnd;
       lastStripeIndex = stripes.size() - 1;
     }
+
+    if (firstStripeIndex > lastStripeIndex || firstStripeIndex == -1) {
+      /**
+       * If the firstStripeIndex was set after the lastStripeIndex the split lies entirely within a single stripe.
+       * In case the split lies entirely within the last stripe, the firstStripeIndex will never be found, hence the
+       * second condition.
+       * In this case, the reader for this split will not read any data.
+       * See {@link org.apache.orc.impl.RecordReaderImpl#RecordReaderImpl
+       * Create a KeyInterval such that no delete delta records are loaded into memory in the deleteEventRegistry.
+       */
+
+      long minRowId = 1;
+      long maxRowId = 0;
+      int minBucketProp = 1;
+      int maxBucketProp = 0;
+
+      OrcRawRecordMerger.KeyInterval keyIntervalTmp =
+          new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(1, minBucketProp, minRowId),
+          new RecordIdentifier(0, maxBucketProp, maxRowId));
+
+      setSARG(keyIntervalTmp, deleteEventReaderOptions, minBucketProp, maxBucketProp,
+          minRowId, maxRowId);
+      LOG.info("findMinMaxKeys(): " + keyIntervalTmp +
+          " stripes(" + firstStripeIndex + "," + lastStripeIndex + ")");
+
+      return keyIntervalTmp;
+    }
+
     if(firstStripeIndex == -1 || lastStripeIndex == -1) {
       //this should not happen but... if we don't know which stripe(s) are
       //involved we can't figure out min/max bounds