You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2018/07/11 21:14:04 UTC
hive git commit: HIVE-20076: ACID: Fix Synthetic ROW__ID generation
for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman)
Repository: hive
Updated Branches:
refs/heads/master 733c4f316 -> 4a0814b7f
HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4a0814b7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4a0814b7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4a0814b7
Branch: refs/heads/master
Commit: 4a0814b7f2edccb98f028a1528fc45c31a0d286f
Parents: 733c4f3
Author: Teddy Choi <pu...@gmail.com>
Authored: Thu Jul 12 06:13:40 2018 +0900
Committer: Teddy Choi <pu...@gmail.com>
Committed: Thu Jul 12 06:13:40 2018 +0900
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java | 6 ++++++
.../apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java | 4 ++++
2 files changed, 10 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4a0814b7/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index d177e3f..889bd58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -147,6 +147,12 @@ public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl
public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException {
// If the user hasn't been reading by row, use the fast path.
if (rowInBatch >= batch.size) {
+ if (batch.size > 0) {
+ // the local batch has been consumed entirely, reset it
+ batch.reset();
+ }
+ baseRow = super.getRowNumber();
+ rowInBatch = 0;
return super.nextBatch(theirBatch);
}
copyIntoBatch(theirBatch, batch, rowInBatch);
http://git-wip-us.apache.org/repos/asf/hive/blob/4a0814b7/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
index 2071d13..aa99e57 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
@@ -154,9 +154,13 @@ public class TestVectorizedORCReader {
VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
OrcStruct row = null;
+ long lastRowNumber = -1;
// Check Vectorized ORC reader against ORC row reader
while (vrr.nextBatch(batch)) {
+ Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
for (int i = 0; i < batch.size; i++) {
+ Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber()+i);
+ lastRowNumber = rr.getRowNumber();
row = (OrcStruct) rr.next(row);
for (int j = 0; j < batch.cols.length; j++) {
Object a = (row.getFieldValue(j));