You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2018/10/05 16:06:22 UTC
hive git commit: HIVE-20664: Potential ArrayIndexOutOfBoundsException
in VectorizedOrcAcidRowBatchReader.findMinMaxKeys (Saurabh Seth via Eugene
Koifman)
Repository: hive
Updated Branches:
refs/heads/master f0434c5b5 -> 9593e46e6
HIVE-20664: Potential ArrayIndexOutOfBoundsException in VectorizedOrcAcidRowBatchReader.findMinMaxKeys (Saurabh Seth via Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9593e46e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9593e46e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9593e46e
Branch: refs/heads/master
Commit: 9593e46e63aa8f41217a5eeafd557cc6c95e473e
Parents: f0434c5
Author: Saurabh Seth <sa...@gmail.com>
Authored: Fri Oct 5 08:35:47 2018 -0700
Committer: Eugene Koifman <ek...@apache.org>
Committed: Fri Oct 5 08:35:47 2018 -0700
----------------------------------------------------------------------
.../io/orc/VectorizedOrcAcidRowBatchReader.java | 33 +++++++++++++++++---
1 file changed, 29 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/9593e46e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index 1509bba..0cefeee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -422,10 +422,7 @@ public class VectorizedOrcAcidRowBatchReader
if(firstStripeIndex == -1 && stripe.getOffset() >= splitStart) {
firstStripeIndex = i;
}
- if(lastStripeIndex == -1 && splitEnd <= stripeEnd &&
- stripes.get(firstStripeIndex).getOffset() <= stripe.getOffset() ) {
- //the last condition is for when both splitStart and splitEnd are in
- // the same stripe
+ if(lastStripeIndex == -1 && splitEnd <= stripeEnd) {
lastStripeIndex = i;
}
}
@@ -435,6 +432,34 @@ public class VectorizedOrcAcidRowBatchReader
stripes.get(stripes.size() - 1).getLength() < splitEnd;
lastStripeIndex = stripes.size() - 1;
}
+
+ if (firstStripeIndex > lastStripeIndex || firstStripeIndex == -1) {
+ /**
+ * If the firstStripeIndex was set after the lastStripeIndex the split lies entirely within a single stripe.
+ * In case the split lies entirely within the last stripe, the firstStripeIndex will never be found, hence the
+ * second condition.
+ * In this case, the reader for this split will not read any data.
+ * See {@link org.apache.orc.impl.RecordReaderImpl#RecordReaderImpl
+ * Create a KeyInterval such that no delete delta records are loaded into memory in the deleteEventRegistry.
+ */
+
+ long minRowId = 1;
+ long maxRowId = 0;
+ int minBucketProp = 1;
+ int maxBucketProp = 0;
+
+ OrcRawRecordMerger.KeyInterval keyIntervalTmp =
+ new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(1, minBucketProp, minRowId),
+ new RecordIdentifier(0, maxBucketProp, maxRowId));
+
+ setSARG(keyIntervalTmp, deleteEventReaderOptions, minBucketProp, maxBucketProp,
+ minRowId, maxRowId);
+ LOG.info("findMinMaxKeys(): " + keyIntervalTmp +
+ " stripes(" + firstStripeIndex + "," + lastStripeIndex + ")");
+
+ return keyIntervalTmp;
+ }
+
if(firstStripeIndex == -1 || lastStripeIndex == -1) {
//this should not happen but... if we don't know which stripe(s) are
//involved we can't figure out min/max bounds