You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2018/07/11 21:14:04 UTC

hive git commit: HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman)

Repository: hive
Updated Branches:
  refs/heads/master 733c4f316 -> 4a0814b7f


HIVE-20076: ACID: Fix Synthetic ROW__ID generation for vectorized orc readers (Teddy Choi, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4a0814b7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4a0814b7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4a0814b7

Branch: refs/heads/master
Commit: 4a0814b7f2edccb98f028a1528fc45c31a0d286f
Parents: 733c4f3
Author: Teddy Choi <pu...@gmail.com>
Authored: Thu Jul 12 06:13:40 2018 +0900
Committer: Teddy Choi <pu...@gmail.com>
Committed: Thu Jul 12 06:13:40 2018 +0900

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java     | 6 ++++++
 .../apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java  | 4 ++++
 2 files changed, 10 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4a0814b7/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index d177e3f..889bd58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -147,6 +147,12 @@ public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl
   public boolean nextBatch(VectorizedRowBatch theirBatch) throws IOException {
     // If the user hasn't been reading by row, use the fast path.
     if (rowInBatch >= batch.size) {
+      if (batch.size > 0) {
+        // the local batch has been consumed entirely, reset it
+        batch.reset();
+      }
+      baseRow = super.getRowNumber();
+      rowInBatch = 0;
       return super.nextBatch(theirBatch);
     }
     copyIntoBatch(theirBatch, batch, rowInBatch);

http://git-wip-us.apache.org/repos/asf/hive/blob/4a0814b7/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
index 2071d13..aa99e57 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
@@ -154,9 +154,13 @@ public class TestVectorizedORCReader {
     VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
     OrcStruct row = null;
 
+    long lastRowNumber = -1;
     // Check Vectorized ORC reader against ORC row reader
     while (vrr.nextBatch(batch)) {
+      Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
       for (int i = 0; i < batch.size; i++) {
+        Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber()+i);
+        lastRowNumber = rr.getRowNumber();
         row = (OrcStruct) rr.next(row);
         for (int j = 0; j < batch.cols.length; j++) {
           Object a = (row.getFieldValue(j));