You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by mm...@apache.org on 2016/04/16 01:04:59 UTC

[1/4] hive git commit: Revert "HIVE-12159: Create vectorized readers for the complex types (Owen O'Malley, reviewed by Matt McCline)"

Repository: hive
Updated Branches:
  refs/heads/master 40e0c3807 -> d559b3475


http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
index a52b3ef..adb52f0 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
@@ -151,11 +151,12 @@ public class TestVectorizedORCReader {
         OrcFile.readerOptions(conf));
     RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
     RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
-    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
+    VectorizedRowBatch batch = null;
     OrcStruct row = null;
 
     // Check Vectorized ORC reader against ORC row reader
-    while (vrr.nextBatch(batch)) {
+    while (vrr.hasNext()) {
+      batch = vrr.nextBatch(batch);
       for (int i = 0; i < batch.size; i++) {
         row = (OrcStruct) rr.next(row);
         for (int j = 0; j < batch.cols.length; j++) {
@@ -238,6 +239,6 @@ public class TestVectorizedORCReader {
       Assert.assertEquals(false, batch.cols[8].noNulls);
       Assert.assertEquals(false, batch.cols[9].noNulls);
     }
-    Assert.assertEquals(false, rr.nextBatch(batch));
+    Assert.assertEquals(false, rr.hasNext());
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index f915a7e..99744cd 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -338,17 +338,6 @@ public class BytesColumnVector extends ColumnVector {
     initBuffer(0);
   }
 
-  public String toString(int row) {
-    if (isRepeating) {
-      row = 0;
-    }
-    if (noNulls || !isNull[row]) {
-      return new String(vector[row], start[row], length[row]);
-    } else {
-      return null;
-    }
-  }
-
   @Override
   public void stringifyValue(StringBuilder buffer, int row) {
     if (isRepeating) {

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index d971339..c0dd5ed 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -392,4 +392,4 @@ public class TimestampColumnVector extends ColumnVector {
       buffer.append("null");
     }
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
index 0c61243..298d588 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector;
 
+import java.util.Arrays;
+
 /**
  * The representation of a vectorized column of struct objects.
  *

[4/4] hive git commit: Revert "HIVE-12159: Create vectorized readers for the complex types (Owen O'Malley, reviewed by Matt McCline)"

Posted by mm...@apache.org.

Revert "HIVE-12159: Create vectorized readers for the complex types (Owen O'Malley, reviewed by Matt McCline)"

This reverts commit 0dd4621f34f6043071474220a082268cda124b9d.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d559b347
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d559b347
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d559b347

Branch: refs/heads/master
Commit: d559b34755010b5ed3ecc31fa423d01788e5e875
Parents: 40e0c38
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Apr 15 16:00:18 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Apr 15 16:00:18 2016 -0700

----------------------------------------------------------------------
 .../llap/io/decode/OrcEncodedDataConsumer.java  |   45 +-
 orc/src/java/org/apache/orc/OrcUtils.java       |   75 -
 orc/src/java/org/apache/orc/Reader.java         |    6 -
 orc/src/java/org/apache/orc/RecordReader.java   |    8 +-
 .../java/org/apache/orc/TypeDescription.java    |   62 +-
 .../org/apache/orc/impl/BitFieldReader.java     |    5 +-
 .../java/org/apache/orc/impl/IntegerReader.java |   26 +-
 .../apache/orc/impl/RunLengthByteReader.java    |   36 +-
 .../apache/orc/impl/RunLengthIntegerReader.java |   31 +-
 .../orc/impl/RunLengthIntegerReaderV2.java      |   33 +-
 .../java/org/apache/orc/impl/WriterImpl.java    |   47 +-
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |   13 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |   43 +-
 .../hive/ql/io/orc/OrcRawRecordMerger.java      |    3 +-
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       |   12 +-
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |   50 +-
 .../hadoop/hive/ql/io/orc/SchemaEvolution.java  |  234 ++-
 .../hive/ql/io/orc/TreeReaderFactory.java       |  838 ++++-----
 .../ql/io/orc/VectorizedOrcInputFormat.java     |   32 +-
 .../hadoop/hive/ql/io/orc/WriterImpl.java       |    2 +
 .../hive/ql/io/orc/TestTypeDescription.java     |    4 +-
 .../hive/ql/io/orc/TestVectorOrcFile.java       | 1634 +++++++++---------
 .../hive/ql/io/orc/TestVectorizedORCReader.java |    7 +-
 .../hive/ql/exec/vector/BytesColumnVector.java  |   11 -
 .../ql/exec/vector/TimestampColumnVector.java   |    2 +-
 .../hive/ql/exec/vector/UnionColumnVector.java  |    2 +
 26 files changed, 1476 insertions(+), 1785 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index baaa4d7..7ee263d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -18,7 +18,6 @@
 package org.apache.hadoop.hive.llap.io.decode;
 
 import java.io.IOException;
-import java.util.List;
 
 import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch;
 import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData;
@@ -28,12 +27,7 @@ import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch;
 import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata;
 import org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata;
 import org.apache.hadoop.hive.llap.metrics.LlapDaemonQueueMetrics;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.CompressionCodec;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer;
@@ -77,35 +71,6 @@ public class OrcEncodedDataConsumer
     stripes[m.getStripeIx()] = m;
   }
 
-  private static ColumnVector createColumn(OrcProto.Type type,
-                                           int batchSize) {
-    switch (type.getKind()) {
-      case BOOLEAN:
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-      case DATE:
-        return new LongColumnVector(batchSize);
-      case FLOAT:
-      case DOUBLE:
-        return new DoubleColumnVector(batchSize);
-      case BINARY:
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return new BytesColumnVector(batchSize);
-      case TIMESTAMP:
-        return new TimestampColumnVector(batchSize);
-      case DECIMAL:
-        return new DecimalColumnVector(batchSize, type.getPrecision(),
-            type.getScale());
-      default:
-        throw new IllegalArgumentException("LLAP does not support " +
-            type.getKind());
-    }
-  }
-
   @Override
   protected void decodeBatch(OrcEncodedColumnBatch batch,
       Consumer<ColumnVectorBatch> downstreamConsumer) {
@@ -147,15 +112,9 @@ public class OrcEncodedDataConsumer
         ColumnVectorBatch cvb = cvbPool.take();
         assert cvb.cols.length == batch.getColumnIxs().length; // Must be constant per split.
         cvb.size = batchSize;
-        List<OrcProto.Type> types = fileMetadata.getTypes();
-        int[] columnMapping = batch.getColumnIxs();
+
         for (int idx = 0; idx < batch.getColumnIxs().length; idx++) {
-          if (cvb.cols[idx] == null) {
-            // skip over the top level struct, but otherwise assume no complex
-            // types
-            cvb.cols[idx] = createColumn(types.get(columnMapping[idx]), batchSize);
-          }
-          columnReaders[idx].nextVector(cvb.cols[idx], null, batchSize);
+          cvb.cols[idx] = (ColumnVector)columnReaders[idx].nextVector(cvb.cols[idx], batchSize);
         }
 
         // we are done reading a batch, send it to consumer for processing

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcUtils.java b/orc/src/java/org/apache/orc/OrcUtils.java
index 2ebe9a7..2e93254 100644
--- a/orc/src/java/org/apache/orc/OrcUtils.java
+++ b/orc/src/java/org/apache/orc/OrcUtils.java
@@ -449,79 +449,4 @@ public class OrcUtils {
     return columnId;
   }
 
-  /**
-   * Translate the given rootColumn from the list of types to a TypeDescription.
-   * @param types all of the types
-   * @param rootColumn translate this type
-   * @return a new TypeDescription that matches the given rootColumn
-   */
-  public static
-        TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
-                                                int rootColumn) {
-    OrcProto.Type type = types.get(rootColumn);
-    switch (type.getKind()) {
-      case BOOLEAN:
-        return TypeDescription.createBoolean();
-      case BYTE:
-        return TypeDescription.createByte();
-      case SHORT:
-        return TypeDescription.createShort();
-      case INT:
-        return TypeDescription.createInt();
-      case LONG:
-        return TypeDescription.createLong();
-      case FLOAT:
-        return TypeDescription.createFloat();
-      case DOUBLE:
-        return TypeDescription.createDouble();
-      case STRING:
-        return TypeDescription.createString();
-      case CHAR:
-        return TypeDescription.createChar()
-            .withMaxLength(type.getMaximumLength());
-      case VARCHAR:
-        return TypeDescription.createVarchar()
-            .withMaxLength(type.getMaximumLength());
-      case BINARY:
-        return TypeDescription.createBinary();
-      case TIMESTAMP:
-        return TypeDescription.createTimestamp();
-      case DATE:
-        return TypeDescription.createDate();
-      case DECIMAL: {
-        TypeDescription result = TypeDescription.createDecimal();
-        if (type.hasScale()) {
-          result.withScale(type.getScale());
-        }
-        if (type.hasPrecision()) {
-          result.withPrecision(type.getPrecision());
-        }
-        return result;
-      }
-      case LIST:
-        return TypeDescription.createList(
-            convertTypeFromProtobuf(types, type.getSubtypes(0)));
-      case MAP:
-        return TypeDescription.createMap(
-            convertTypeFromProtobuf(types, type.getSubtypes(0)),
-            convertTypeFromProtobuf(types, type.getSubtypes(1)));
-      case STRUCT: {
-        TypeDescription result = TypeDescription.createStruct();
-        for(int f=0; f < type.getSubtypesCount(); ++f) {
-          result.addField(type.getFieldNames(f),
-              convertTypeFromProtobuf(types, type.getSubtypes(f)));
-        }
-        return result;
-      }
-      case UNION: {
-        TypeDescription result = TypeDescription.createUnion();
-        for(int f=0; f < type.getSubtypesCount(); ++f) {
-          result.addUnionChild(
-              convertTypeFromProtobuf(types, type.getSubtypes(f)));
-        }
-        return result;
-      }
-    }
-    throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Reader.java b/orc/src/java/org/apache/orc/Reader.java
index 62a05e9..be722b5 100644
--- a/orc/src/java/org/apache/orc/Reader.java
+++ b/orc/src/java/org/apache/orc/Reader.java
@@ -116,15 +116,9 @@ public interface Reader {
   ColumnStatistics[] getStatistics();
 
   /**
-   * Get the type of rows in this ORC file.
-   */
-  TypeDescription getSchema();
-
-  /**
    * Get the list of types contained in the file. The root type is the first
    * type in the list.
    * @return the list of flattened types
-   * @deprecated use getSchema instead
    */
   List<OrcProto.Type> getTypes();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/RecordReader.java b/orc/src/java/org/apache/orc/RecordReader.java
index 09ba0f0..7229dda 100644
--- a/orc/src/java/org/apache/orc/RecordReader.java
+++ b/orc/src/java/org/apache/orc/RecordReader.java
@@ -30,11 +30,13 @@ public interface RecordReader {
    * controlled by the callers. Caller need to look at
    * VectorizedRowBatch.size of the retunred object to know the batch
    * size read.
-   * @param batch a row batch object to read into
-   * @return were more rows available to read?
+   * @param previousBatch a row batch object that can be reused by the reader
+   * @return the row batch that was read. The batch will have a non-zero row
+   *         count if the pointer isn't at the end of the file
    * @throws java.io.IOException
    */
-  boolean nextBatch(VectorizedRowBatch batch) throws IOException;
+  VectorizedRowBatch nextBatch(VectorizedRowBatch previousBatch
+			       ) throws IOException;
 
   /**
    * Get the row number of the row that will be returned by the following

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TypeDescription.java b/orc/src/java/org/apache/orc/TypeDescription.java
index b8e057e..bd900ac 100644
--- a/orc/src/java/org/apache/orc/TypeDescription.java
+++ b/orc/src/java/org/apache/orc/TypeDescription.java
@@ -61,7 +61,7 @@ public class TypeDescription {
     LIST("array", false),
     MAP("map", false),
     STRUCT("struct", false),
-    UNION("uniontype", false);
+    UNION("union", false);
 
     Category(String name, boolean isPrimitive) {
       this.name = name;
@@ -258,66 +258,6 @@ public class TypeDescription {
     return id;
   }
 
-  public TypeDescription clone() {
-    TypeDescription result = new TypeDescription(category);
-    result.maxLength = maxLength;
-    result.precision = precision;
-    result.scale = scale;
-    if (fieldNames != null) {
-      result.fieldNames.addAll(fieldNames);
-    }
-    if (children != null) {
-      for(TypeDescription child: children) {
-        TypeDescription clone = child.clone();
-        clone.parent = result;
-        result.children.add(clone);
-      }
-    }
-    return result;
-  }
-
-  @Override
-  public int hashCode() {
-    return getId();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || other.getClass() != TypeDescription.class) {
-      return false;
-    }
-    if (other == this) {
-      return true;
-    }
-    TypeDescription castOther = (TypeDescription) other;
-    if (category != castOther.category ||
-        getId() != castOther.getId() ||
-        getMaximumId() != castOther.getMaximumId() ||
-        maxLength != castOther.maxLength ||
-        scale != castOther.scale ||
-        precision != castOther.precision) {
-      return false;
-    }
-    if (children != null) {
-      if (children.size() != castOther.children.size()) {
-        return false;
-      }
-      for (int i = 0; i < children.size(); ++i) {
-        if (!children.get(i).equals(castOther.children.get(i))) {
-          return false;
-        }
-      }
-    }
-    if (category == Category.STRUCT) {
-      for(int i=0; i < fieldNames.size(); ++i) {
-        if (!fieldNames.get(i).equals(castOther.fieldNames.get(i))) {
-          return false;
-        }
-      }
-    }
-    return true;
-  }
-
   /**
    * Get the maximum id assigned to this type or its children.
    * The first call will cause all of the the ids in tree to be assigned, so

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/impl/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/orc/impl/BitFieldReader.java
index dda7355..8d9d3cb 100644
--- a/orc/src/java/org/apache/orc/impl/BitFieldReader.java
+++ b/orc/src/java/org/apache/orc/impl/BitFieldReader.java
@@ -137,7 +137,7 @@ public class BitFieldReader {
                          long previousLen) throws IOException {
     previous.isRepeating = true;
     for (int i = 0; i < previousLen; i++) {
-      if (previous.noNulls || !previous.isNull[i]) {
+      if (!previous.isNull[i]) {
         previous.vector[i] = next();
       } else {
         // The default value of null for int types in vectorized
@@ -150,8 +150,7 @@ public class BitFieldReader {
       // when determining the isRepeating flag.
       if (previous.isRepeating
           && i > 0
-          && ((previous.vector[0] != previous.vector[i]) ||
-          (previous.isNull[0] != previous.isNull[i]))) {
+          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
         previous.isRepeating = false;
       }
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/impl/IntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/IntegerReader.java b/orc/src/java/org/apache/orc/impl/IntegerReader.java
index 8bef0f1..7dfd289 100644
--- a/orc/src/java/org/apache/orc/impl/IntegerReader.java
+++ b/orc/src/java/org/apache/orc/impl/IntegerReader.java
@@ -20,7 +20,7 @@ package org.apache.orc.impl;
 
 import java.io.IOException;
 
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 
 /**
  * Interface for reading integers.
@@ -57,25 +57,9 @@ public interface IntegerReader {
 
   /**
    * Return the next available vector for values.
-   * @param column the column being read
-   * @param data the vector to read into
-   * @param length the number of numbers to read
-   * @throws IOException
-   */
-   void nextVector(ColumnVector column,
-                   long[] data,
-                   int length
-                   ) throws IOException;
-
-  /**
-   * Return the next available vector for values. Does not change the
-   * value of column.isRepeating.
-   * @param column the column being read
-   * @param data the vector to read into
-   * @param length the number of numbers to read
+   * @return
    * @throws IOException
    */
-  void nextVector(ColumnVector column,
-                  int[] data,
-                  int length
-                  ) throws IOException;}
+   void nextVector(LongColumnVector previous, final int previousLen)
+      throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java b/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
index 24bd051..380f3391 100644
--- a/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
+++ b/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
@@ -20,7 +20,7 @@ package org.apache.orc.impl;
 import java.io.EOFException;
 import java.io.IOException;
 
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 
 /**
  * A reader that reads a sequence of bytes. A control byte is read before
@@ -92,16 +92,16 @@ public class RunLengthByteReader {
     return result;
   }
 
-  public void nextVector(ColumnVector previous, long[] data, long size)
+  public void nextVector(LongColumnVector previous, long previousLen)
       throws IOException {
     previous.isRepeating = true;
-    for (int i = 0; i < size; i++) {
+    for (int i = 0; i < previousLen; i++) {
       if (!previous.isNull[i]) {
-        data[i] = next();
+        previous.vector[i] = next();
       } else {
         // The default value of null for int types in vectorized
         // processing is 1, so set that if the value is null
-        data[i] = 1;
+        previous.vector[i] = 1;
       }
 
       // The default value for nulls in Vectorization for int types is 1
@@ -109,36 +109,12 @@ public class RunLengthByteReader {
       // when determining the isRepeating flag.
       if (previous.isRepeating
           && i > 0
-          && ((data[0] != data[i]) ||
-              (previous.isNull[0] != previous.isNull[i]))) {
+          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
         previous.isRepeating = false;
       }
     }
   }
 
-  /**
-   * Read the next size bytes into the data array, skipping over any slots
-   * where isNull is true.
-   * @param isNull if non-null, skip any rows where isNull[r] is true
-   * @param data the array to read into
-   * @param size the number of elements to read
-   * @throws IOException
-   */
-  public void nextVector(boolean[] isNull, int[] data,
-                         long size) throws IOException {
-    if (isNull == null) {
-      for(int i=0; i < size; ++i) {
-        data[i] = next();
-      }
-    } else {
-      for(int i=0; i < size; ++i) {
-        if (!isNull[i]) {
-          data[i] = next();
-        }
-      }
-    }
-  }
-
   public void seek(PositionProvider index) throws IOException {
     input.seek(index);
     int consumed = (int) index.getNext();

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java b/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
index b91a263..0c90cde 100644
--- a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
+++ b/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
@@ -20,7 +20,7 @@ package org.apache.orc.impl;
 import java.io.EOFException;
 import java.io.IOException;
 
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 
 /**
  * A reader that reads a sequence of integers.
@@ -99,17 +99,15 @@ public class RunLengthIntegerReader implements IntegerReader {
   }
 
   @Override
-  public void nextVector(ColumnVector previous,
-                         long[] data,
-                         int previousLen) throws IOException {
+  public void nextVector(LongColumnVector previous, final int previousLen) throws IOException {
     previous.isRepeating = true;
     for (int i = 0; i < previousLen; i++) {
       if (!previous.isNull[i]) {
-        data[i] = next();
+        previous.vector[i] = next();
       } else {
         // The default value of null for int type in vectorized
         // processing is 1, so set that if the value is null
-        data[i] = 1;
+        previous.vector[i] = 1;
       }
 
       // The default value for nulls in Vectorization for int types is 1
@@ -117,32 +115,13 @@ public class RunLengthIntegerReader implements IntegerReader {
       // when determining the isRepeating flag.
       if (previous.isRepeating
           && i > 0
-          && (data[0] != data[i] || previous.isNull[0] != previous.isNull[i])) {
+          && (previous.vector[i - 1] != previous.vector[i] || previous.isNull[i - 1] != previous.isNull[i])) {
         previous.isRepeating = false;
       }
     }
   }
 
   @Override
-  public void nextVector(ColumnVector vector,
-                         int[] data,
-                         int size) throws IOException {
-    if (vector.noNulls) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        data[r] = (int) next();
-      }
-    } else if (!(vector.isRepeating && vector.isNull[0])) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        if (!vector.isNull[r]) {
-          data[r] = (int) next();
-        } else {
-          data[r] = 1;
-        }
-      }
-    }
-  }
-
-  @Override
   public void seek(PositionProvider index) throws IOException {
     input.seek(index);
     int consumed = (int) index.getNext();

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java b/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
index 610d9b5..c6d685a 100644
--- a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
+++ b/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
@@ -21,9 +21,9 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 
 /**
  * A reader that reads a sequence of light weight compressed integers. Refer
@@ -360,17 +360,15 @@ public class RunLengthIntegerReaderV2 implements IntegerReader {
   }
 
   @Override
-  public void nextVector(ColumnVector previous,
-                         long[] data,
-                         int previousLen) throws IOException {
+  public void nextVector(LongColumnVector previous, final int previousLen) throws IOException {
     previous.isRepeating = true;
     for (int i = 0; i < previousLen; i++) {
       if (!previous.isNull[i]) {
-        data[i] = next();
+        previous.vector[i] = next();
       } else {
         // The default value of null for int type in vectorized
         // processing is 1, so set that if the value is null
-        data[i] = 1;
+        previous.vector[i] = 1;
       }
 
       // The default value for nulls in Vectorization for int types is 1
@@ -378,29 +376,10 @@ public class RunLengthIntegerReaderV2 implements IntegerReader {
       // when determining the isRepeating flag.
       if (previous.isRepeating
           && i > 0
-          && (data[0] != data[i] ||
-          previous.isNull[0] != previous.isNull[i])) {
+          && (previous.vector[i - 1] != previous.vector[i] ||
+          previous.isNull[i - 1] != previous.isNull[i])) {
         previous.isRepeating = false;
       }
     }
   }
-
-  @Override
-  public void nextVector(ColumnVector vector,
-                         int[] data,
-                         int size) throws IOException {
-    if (vector.noNulls) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        data[r] = (int) next();
-      }
-    } else if (!(vector.isRepeating && vector.isNull[0])) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        if (!vector.isNull[r]) {
-          data[r] = (int) next();
-        } else {
-          data[r] = 1;
-        }
-      }
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/orc/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/WriterImpl.java b/orc/src/java/org/apache/orc/impl/WriterImpl.java
index b2966e0..f8afe06 100644
--- a/orc/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/orc/src/java/org/apache/orc/impl/WriterImpl.java
@@ -1693,10 +1693,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
-  public static long MILLIS_PER_DAY = 24 * 60 * 60 * 1000;
-  public static long NANOS_PER_MILLI = 1000000;
   public static final int MILLIS_PER_SECOND = 1000;
   static final int NANOS_PER_SECOND = 1000000000;
+  static final int MILLIS_PER_NANO  = 1000000;
   public static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
 
   private static class TimestampTreeWriter extends TreeWriter {
@@ -2262,36 +2261,32 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
         }
       } else {
         // write the records in runs of the same tag
-        int[] currentStart = new int[vec.fields.length];
-        int[] currentLength = new int[vec.fields.length];
+        byte prevTag = 0;
+        int currentRun = 0;
+        boolean started = false;
         for(int i=0; i < length; ++i) {
-          // only need to deal with the non-nulls, since the nulls were dealt
-          // with in the super method.
-          if (vec.noNulls || !vec.isNull[i + offset]) {
+          if (!vec.isNull[i + offset]) {
             byte tag = (byte) vec.tags[offset + i];
             tags.write(tag);
-            if (currentLength[tag] == 0) {
-              // start a new sequence
-              currentStart[tag] = i + offset;
-              currentLength[tag] = 1;
-            } else if (currentStart[tag] + currentLength[tag] == i + offset) {
-              // ok, we are extending the current run for that tag.
-              currentLength[tag] += 1;
-            } else {
-              // otherwise, we need to close off the old run and start a new one
-              childrenWriters[tag].writeBatch(vec.fields[tag],
-                  currentStart[tag], currentLength[tag]);
-              currentStart[tag] = i + offset;
-              currentLength[tag] = 1;
+            if (!started) {
+              started = true;
+              currentRun = i;
+              prevTag = tag;
+            } else if (tag != prevTag) {
+              childrenWriters[prevTag].writeBatch(vec.fields[prevTag],
+                  offset + currentRun, i - currentRun);
+              currentRun = i;
+              prevTag = tag;
             }
+          } else if (started) {
+            started = false;
+            childrenWriters[prevTag].writeBatch(vec.fields[prevTag],
+                offset + currentRun, i - currentRun);
           }
         }
-        // write out any left over sequences
-        for(int tag=0; tag < currentStart.length; ++tag) {
-          if (currentLength[tag] != 0) {
-            childrenWriters[tag].writeBatch(vec.fields[tag], currentStart[tag],
-                currentLength[tag]);
-          }
+        if (started) {
+          childrenWriters[prevTag].writeBatch(vec.fields[prevTag],
+              offset + currentRun, length - currentRun);
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index 82a97e0..0724191 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -215,9 +215,12 @@ public class VectorizedRowBatchCtx {
     LOG.info("createVectorizedRowBatch columnsToIncludeTruncated " + Arrays.toString(columnsToIncludeTruncated));
     int totalColumnCount = rowColumnTypeInfos.length + scratchColumnTypeNames.length;
     VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
-    for (int i = 0; i < dataColumnCount; i++) {
-      TypeInfo typeInfo = rowColumnTypeInfos[i];
-      result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo);
+
+    for (int i = 0; i < columnsToIncludeTruncated.length; i++) {
+      if (columnsToIncludeTruncated[i]) {
+        TypeInfo typeInfo = rowColumnTypeInfos[i];
+        result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo);
+      }
     }
 
     for (int i = dataColumnCount; i < dataColumnCount + partitionColumnCount; i++) {
@@ -473,8 +476,8 @@ public class VectorizedRowBatchCtx {
             bcv.isNull[0] = true;
             bcv.isRepeating = true;
           } else {
-            bcv.setVal(0, sVal.getBytes());
-            bcv.isRepeating = true;
+            bcv.fill(sVal.getBytes());
+            bcv.isNull[0] = false;
           }
         }
         break;

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index fcb8ca4..fe0be7b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -301,7 +301,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     /**
      * Do we have schema on read in the configuration variables?
      */
-    TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE);
+    TypeDescription schema = getDesiredRowTypeDescr(conf, /* isAcidRead */ false);
 
     Reader.Options options = new Reader.Options().range(offset, length);
     options.schema(schema);
@@ -1743,7 +1743,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     /**
      * Do we have schema on read in the configuration variables?
      */
-    TypeDescription schema = getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
+    TypeDescription schema = getDesiredRowTypeDescr(conf, /* isAcidRead */ true);
 
     final Reader reader;
     final int bucket;
@@ -1994,13 +1994,10 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
   /**
    * Convert a Hive type property string that contains separated type names into a list of
    * TypeDescription objects.
-   * @param hiveTypeProperty the desired types from hive
-   * @param maxColumns the maximum number of desired columns
    * @return the list of TypeDescription objects.
    */
-  public static ArrayList<TypeDescription>
-      typeDescriptionsFromHiveTypeProperty(String hiveTypeProperty,
-                                           int maxColumns) {
+  public static ArrayList<TypeDescription> typeDescriptionsFromHiveTypeProperty(
+      String hiveTypeProperty) {
 
     // CONSDIER: We need a type name parser for TypeDescription.
 
@@ -2008,9 +2005,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     ArrayList<TypeDescription> typeDescrList =new ArrayList<TypeDescription>(typeInfoList.size());
     for (TypeInfo typeInfo : typeInfoList) {
       typeDescrList.add(convertTypeInfo(typeInfo));
-      if (typeDescrList.size() >= maxColumns) {
-        break;
-      }
     }
     return typeDescrList;
   }
@@ -2097,18 +2091,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     }
   }
 
-  /**
-   * Generate the desired schema for reading the file.
-   * @param conf the configuration
-   * @param isAcidRead is this an acid format?
-   * @param dataColumns the desired number of data columns for vectorized read
-   * @return the desired schema or null if schema evolution isn't enabled
-   * @throws IOException
-   */
-  public static TypeDescription getDesiredRowTypeDescr(Configuration conf,
-                                                       boolean isAcidRead,
-                                                       int dataColumns
-                                                       ) throws IOException {
+  public static TypeDescription getDesiredRowTypeDescr(Configuration conf, boolean isAcidRead)
+      throws IOException {
 
     String columnNameProperty = null;
     String columnTypeProperty = null;
@@ -2131,10 +2115,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
           haveSchemaEvolutionProperties = false;
         } else {
           schemaEvolutionTypeDescrs =
-              typeDescriptionsFromHiveTypeProperty(columnTypeProperty,
-                  dataColumns);
-          if (schemaEvolutionTypeDescrs.size() !=
-              Math.min(dataColumns, schemaEvolutionColumnNames.size())) {
+              typeDescriptionsFromHiveTypeProperty(columnTypeProperty);
+          if (schemaEvolutionTypeDescrs.size() != schemaEvolutionColumnNames.size()) {
             haveSchemaEvolutionProperties = false;
           }
         }
@@ -2165,9 +2147,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         return null;
       }
       schemaEvolutionTypeDescrs =
-          typeDescriptionsFromHiveTypeProperty(columnTypeProperty, dataColumns);
-      if (schemaEvolutionTypeDescrs.size() !=
-          Math.min(dataColumns, schemaEvolutionColumnNames.size())) {
+          typeDescriptionsFromHiveTypeProperty(columnTypeProperty);
+      if (schemaEvolutionTypeDescrs.size() != schemaEvolutionColumnNames.size()) {
         return null;
       }
 
@@ -2181,7 +2162,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         }
         columnNum++;
       }
-      if (virtualColumnClipNum != -1 && virtualColumnClipNum < dataColumns) {
+      if (virtualColumnClipNum != -1) {
         schemaEvolutionColumnNames =
             Lists.newArrayList(schemaEvolutionColumnNames.subList(0, virtualColumnClipNum));
         schemaEvolutionTypeDescrs = Lists.newArrayList(schemaEvolutionTypeDescrs.subList(0, virtualColumnClipNum));
@@ -2198,7 +2179,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
     // Desired schema does not include virtual columns or partition columns.
     TypeDescription result = TypeDescription.createStruct();
-    for (int i = 0; i < schemaEvolutionTypeDescrs.size(); i++) {
+    for (int i = 0; i < schemaEvolutionColumnNames.size(); i++) {
       result.addField(schemaEvolutionColumnNames.get(i), schemaEvolutionTypeDescrs.get(i));
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
index 0dd58b7..1fce282 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
@@ -447,8 +447,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{
     this.length = options.getLength();
     this.validTxnList = validTxnList;
 
-    TypeDescription typeDescr =
-        OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
+    TypeDescription typeDescr = OrcInputFormat.getDesiredRowTypeDescr(conf, /* isAcidRead */ true);
 
     objectInspector = OrcRecordUpdater.createEventSchema
         (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr)));

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 0bcf9e3..a031a92 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -26,8 +26,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.orc.OrcUtils;
-import org.apache.orc.TypeDescription;
 import org.apache.orc.impl.BufferChunk;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.impl.ColumnStatisticsImpl;
@@ -73,7 +71,6 @@ public class ReaderImpl implements Reader {
   private final List<OrcProto.StripeStatistics> stripeStats;
   private final int metadataSize;
   protected final List<OrcProto.Type> types;
-  private final TypeDescription schema;
   private final List<OrcProto.UserMetadataItem> userMetadata;
   private final List<OrcProto.ColumnStatistics> fileStats;
   private final List<StripeInformation> stripes;
@@ -246,11 +243,6 @@ public class ReaderImpl implements Reader {
     return result;
   }
 
-  @Override
-  public TypeDescription getSchema() {
-    return schema;
-  }
-
   /**
    * Ensure this is an ORC file to prevent users from trying to read text
    * files or RC files as ORC files.
@@ -394,9 +386,7 @@ public class ReaderImpl implements Reader {
       this.writerVersion = footerMetaData.writerVersion;
       this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList());
     }
-    this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
   }
-
   /**
    * Get the WriterVersion based on the ORC file postscript.
    * @param writerVersion the integer writer version
@@ -678,7 +668,7 @@ public class ReaderImpl implements Reader {
       options.include(include);
     }
     return new RecordReaderImpl(this.getStripes(), fileSystem, path,
-        options, schema, types, codec, bufferSize, rowIndexStride, conf);
+        options, types, codec, bufferSize, rowIndexStride, conf);
   }
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index c214658..3975409 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -27,8 +27,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.TypeDescription;
+import org.apache.orc.OrcUtils;
 import org.apache.orc.impl.BufferChunk;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.impl.ColumnStatisticsImpl;
@@ -57,6 +58,7 @@ import org.apache.hadoop.hive.common.io.DiskRange;
 import org.apache.hadoop.hive.common.io.DiskRangeList;
 import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.BloomFilterIO;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
@@ -96,6 +98,7 @@ public class RecordReaderImpl implements RecordReader {
   private final SargApplier sargApp;
   // an array about which row groups aren't skipped
   private boolean[] includedRowGroups = null;
+  private final Configuration conf;
   private final MetadataReader metadata;
   private final DataReader dataReader;
 
@@ -142,33 +145,33 @@ public class RecordReaderImpl implements RecordReader {
                              FileSystem fileSystem,
                              Path path,
                              Reader.Options options,
-                             TypeDescription fileSchema,
                              List<OrcProto.Type> types,
                              CompressionCodec codec,
                              int bufferSize,
                              long strideRate,
                              Configuration conf
                              ) throws IOException {
-    SchemaEvolution treeReaderSchema;
-    this.included = options.getInclude();
-    included[0] = true;
+
+    TreeReaderFactory.TreeReaderSchema treeReaderSchema;
     if (options.getSchema() == null) {
       if (LOG.isInfoEnabled()) {
         LOG.info("Schema on read not provided -- using file schema " + types.toString());
       }
-      treeReaderSchema = new SchemaEvolution(fileSchema, included);
+      treeReaderSchema = new TreeReaderFactory.TreeReaderSchema().fileTypes(types).schemaTypes(types);
     } else {
 
       // Now that we are creating a record reader for a file, validate that the schema to read
       // is compatible with the file schema.
       //
-      treeReaderSchema = new SchemaEvolution(fileSchema, options.getSchema(),
-          included);
+      List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(options.getSchema());
+      treeReaderSchema = SchemaEvolution.validateAndCreate(types, schemaTypes);
     }
     this.path = path;
     this.codec = codec;
     this.types = types;
     this.bufferSize = bufferSize;
+    this.included = options.getInclude();
+    this.conf = conf;
     this.rowIndexStride = strideRate;
     this.metadata = new MetadataReaderImpl(fileSystem, path, codec, bufferSize, types.size());
     SearchArgument sarg = options.getSearchArgument();
@@ -207,8 +210,7 @@ public class RecordReaderImpl implements RecordReader {
       skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
     }
 
-    reader = TreeReaderFactory.createTreeReader(treeReaderSchema.getReaderSchema(),
-        treeReaderSchema, included, skipCorrupt);
+    reader = TreeReaderFactory.createTreeReader(0, treeReaderSchema, included, skipCorrupt);
     indexes = new OrcProto.RowIndex[types.size()];
     bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
     advanceToNextRow(reader, 0L, true);
@@ -237,7 +239,7 @@ public class RecordReaderImpl implements RecordReader {
     return metadata.readStripeFooter(stripe);
   }
 
-  enum Location {
+  static enum Location {
     BEFORE, MIN, MIDDLE, MAX, AFTER
   }
 
@@ -1050,27 +1052,31 @@ public class RecordReaderImpl implements RecordReader {
   }
 
   @Override
-  public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
+  public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOException {
     try {
+      final VectorizedRowBatch result;
       if (rowInStripe >= rowCountInStripe) {
         currentStripe += 1;
-        if (currentStripe >= stripes.size()) {
-          batch.size = 0;
-          return false;
-        }
         readStripe();
       }
 
-      int batchSize = computeBatchSize(batch.getMaxSize());
+      final int batchSize = computeBatchSize(VectorizedRowBatch.DEFAULT_SIZE);
 
       rowInStripe += batchSize;
-      reader.setVectorColumnCount(batch.getDataColumnCount());
-      reader.nextBatch(batch, batchSize);
+      if (previous == null) {
+        ColumnVector[] cols = (ColumnVector[]) reader.nextVector(null, (int) batchSize);
+        result = new VectorizedRowBatch(cols.length);
+        result.cols = cols;
+      } else {
+        result = previous;
+        result.selectedInUse = false;
+        reader.setVectorColumnCount(result.getDataColumnCount());
+        reader.nextVector(result.cols, batchSize);
+      }
 
-      batch.size = (int) batchSize;
-      batch.selectedInUse = false;
+      result.size = batchSize;
       advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
-      return batch.size  != 0;
+      return result;
     } catch (IOException e) {
       // Rethrow exception with file name in log message
       throw new IOException("Error reading file: " + path, e);

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java
index 6747691..f28ca13 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java
@@ -20,12 +20,13 @@ package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReaderSchema;
+import org.apache.orc.OrcProto;
+import org.apache.orc.OrcUtils;
 import org.apache.orc.TypeDescription;
 
 /**
@@ -33,134 +34,103 @@ import org.apache.orc.TypeDescription;
  * has been schema evolution.
  */
 public class SchemaEvolution {
-  private final Map<TypeDescription, TypeDescription> readerToFile;
-  private final boolean[] included;
-  private final TypeDescription readerSchema;
+
   private static final Log LOG = LogFactory.getLog(SchemaEvolution.class);
 
-  public SchemaEvolution(TypeDescription readerSchema, boolean[] included) {
-    this.included = included;
-    readerToFile = null;
-    this.readerSchema = readerSchema;
-  }
+  public static TreeReaderSchema validateAndCreate(List<OrcProto.Type> fileTypes,
+      List<OrcProto.Type> schemaTypes) throws IOException {
 
-  public SchemaEvolution(TypeDescription fileSchema,
-                         TypeDescription readerSchema,
-                         boolean[] included) throws IOException {
-    readerToFile = new HashMap<>(readerSchema.getMaximumId() + 1);
-    this.included = included;
-    if (checkAcidSchema(fileSchema)) {
-      this.readerSchema = createEventSchema(readerSchema);
+    // For ACID, the row is the ROW field in the outer STRUCT.
+    final boolean isAcid = checkAcidSchema(fileTypes);
+    final List<OrcProto.Type> rowSchema;
+    int rowSubtype;
+    if (isAcid) {
+      rowSubtype = OrcRecordUpdater.ROW + 1;
+      rowSchema = fileTypes.subList(rowSubtype, fileTypes.size());
     } else {
-      this.readerSchema = readerSchema;
+      rowSubtype = 0;
+      rowSchema = fileTypes;
     }
-    buildMapping(fileSchema, this.readerSchema);
-  }
 
-  public TypeDescription getReaderSchema() {
-    return readerSchema;
-  }
+    // Do checking on the overlap.  Additional columns will be defaulted to NULL.
 
-  public TypeDescription getFileType(TypeDescription readerType) {
-    TypeDescription result;
-    if (readerToFile == null) {
-      if (included == null || included[readerType.getId()]) {
-        result = readerType;
-      } else {
-        result = null;
-      }
-    } else {
-      result = readerToFile.get(readerType);
-    }
-    return result;
-  }
+    int numFileColumns = rowSchema.get(0).getSubtypesCount();
+    int numDesiredColumns = schemaTypes.get(0).getSubtypesCount();
 
-  void buildMapping(TypeDescription fileType,
-                    TypeDescription readerType) throws IOException {
-    // if the column isn't included, don't map it
-    if (included != null && !included[readerType.getId()]) {
-      return;
-    }
-    boolean isOk = true;
-    // check the easy case first
-    if (fileType.getCategory() == readerType.getCategory()) {
-      switch (readerType.getCategory()) {
-        case BOOLEAN:
-        case BYTE:
-        case SHORT:
-        case INT:
-        case LONG:
-        case DOUBLE:
-        case FLOAT:
-        case STRING:
-        case TIMESTAMP:
-        case BINARY:
-        case DATE:
-          // these are always a match
-          break;
-        case CHAR:
-        case VARCHAR:
-          isOk = fileType.getMaxLength() == readerType.getMaxLength();
-          break;
-        case DECIMAL:
-          // TODO we don't enforce scale and precision checks, but probably should
-          break;
-        case UNION:
-        case MAP:
-        case LIST: {
-          // these must be an exact match
-          List<TypeDescription> fileChildren = fileType.getChildren();
-          List<TypeDescription> readerChildren = readerType.getChildren();
-          if (fileChildren.size() == readerChildren.size()) {
-            for(int i=0; i < fileChildren.size(); ++i) {
-              buildMapping(fileChildren.get(i), readerChildren.get(i));
-            }
-          } else {
-            isOk = false;
+    int numReadColumns = Math.min(numFileColumns, numDesiredColumns);
+
+    /**
+     * Check type promotion.
+     *
+     * Currently, we only support integer type promotions that can be done "implicitly".
+     * That is, we know that using a bigger integer tree reader on the original smaller integer
+     * column will "just work".
+     *
+     * In the future, other type promotions might require type conversion.
+     */
+    // short -> int -> bigint as same integer readers are used for the above types.
+
+    for (int i = 0; i < numReadColumns; i++) {
+      OrcProto.Type fColType = fileTypes.get(rowSubtype + i);
+      OrcProto.Type rColType = schemaTypes.get(i);
+      if (!fColType.getKind().equals(rColType.getKind())) {
+
+        boolean ok = false;
+        if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) {
+
+          if (rColType.getKind().equals(OrcProto.Type.Kind.INT) ||
+              rColType.getKind().equals(OrcProto.Type.Kind.LONG)) {
+            // type promotion possible, converting SHORT to INT/LONG requested type
+            ok = true;
           }
-          break;
-        }
-        case STRUCT: {
-          // allow either side to have fewer fields than the other
-          List<TypeDescription> fileChildren = fileType.getChildren();
-          List<TypeDescription> readerChildren = readerType.getChildren();
-          int jointSize = Math.min(fileChildren.size(), readerChildren.size());
-          for(int i=0; i < jointSize; ++i) {
-            buildMapping(fileChildren.get(i), readerChildren.get(i));
+        } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) {
+
+          if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) {
+            // type promotion possible, converting INT to LONG requested type
+            ok = true;
           }
-          break;
         }
-        default:
-          throw new IllegalArgumentException("Unknown type " + readerType);
-      }
-    } else {
-      switch (fileType.getCategory()) {
-        case SHORT:
-          if (readerType.getCategory() != TypeDescription.Category.INT &&
-              readerType.getCategory() != TypeDescription.Category.LONG) {
-            isOk = false;
-          }
-          break;
-        case INT:
-          if (readerType.getCategory() != TypeDescription.Category.LONG) {
-            isOk = false;
-          }
-          break;
-        default:
-          isOk = false;
+
+        if (!ok) {
+          throw new IOException("ORC does not support type conversion from " +
+              fColType.getKind().name() + " to " + rColType.getKind().name());
+        }
       }
     }
-    if (isOk) {
-      readerToFile.put(readerType, fileType);
+
+    List<OrcProto.Type> fullSchemaTypes;
+
+    if (isAcid) {
+      fullSchemaTypes = new ArrayList<OrcProto.Type>();
+
+      // This copies the ACID struct type which is subtype = 0.
+      // It has field names "operation" through "row".
+      // And we copy the types for all fields EXCEPT ROW (which must be last!).
+
+      for (int i = 0; i < rowSubtype; i++) {
+        fullSchemaTypes.add(fileTypes.get(i).toBuilder().build());
+      }
+
+      // Add the row struct type.
+      OrcUtils.appendOrcTypesRebuildSubtypes(fullSchemaTypes, schemaTypes, 0);
     } else {
-      throw new IOException("ORC does not support type conversion from " +
-          fileType + " to " + readerType);
+      fullSchemaTypes = schemaTypes;
     }
+
+    int innerStructSubtype = rowSubtype;
+
+    // LOG.info("Schema evolution: (fileTypes) " + fileTypes.toString() +
+    //     " (schemaEvolutionTypes) " + schemaEvolutionTypes.toString());
+
+    return new TreeReaderSchema().
+        fileTypes(fileTypes).
+        schemaTypes(fullSchemaTypes).
+        innerStructSubtype(innerStructSubtype);
   }
 
-  private static boolean checkAcidSchema(TypeDescription type) {
-    if (type.getCategory().equals(TypeDescription.Category.STRUCT)) {
-      List<String> rootFields = type.getFieldNames();
+  private static boolean checkAcidSchema(List<OrcProto.Type> fileSchema) {
+    if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) {
+      List<String> rootFields = fileSchema.get(0).getFieldNamesList();
       if (acidEventFieldNames.equals(rootFields)) {
         return true;
       }
@@ -172,14 +142,26 @@ public class SchemaEvolution {
    * @param typeDescr
    * @return ORC types for the ACID event based on the row's type description
    */
-  public static TypeDescription createEventSchema(TypeDescription typeDescr) {
-    TypeDescription result = TypeDescription.createStruct()
-        .addField("operation", TypeDescription.createInt())
-        .addField("originalTransaction", TypeDescription.createLong())
-        .addField("bucket", TypeDescription.createInt())
-        .addField("rowId", TypeDescription.createLong())
-        .addField("currentTransaction", TypeDescription.createLong())
-        .addField("row", typeDescr.clone());
+  public static List<OrcProto.Type> createEventSchema(TypeDescription typeDescr) {
+
+    List<OrcProto.Type> result = new ArrayList<OrcProto.Type>();
+
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    type.setKind(OrcProto.Type.Kind.STRUCT);
+    type.addAllFieldNames(acidEventFieldNames);
+    for (int i = 0; i < acidEventFieldNames.size(); i++) {
+      type.addSubtypes(i + 1);
+    }
+    result.add(type.build());
+
+    // Automatically add all fields except the last (ROW).
+    for (int i = 0; i < acidEventOrcTypeKinds.size() - 1; i ++) {
+      type.clear();
+      type.setKind(acidEventOrcTypeKinds.get(i));
+      result.add(type.build());
+    }
+
+    OrcUtils.appendOrcTypesRebuildSubtypes(result, typeDescr);
     return result;
   }
 
@@ -192,4 +174,14 @@ public class SchemaEvolution {
     acidEventFieldNames.add("currentTransaction");
     acidEventFieldNames.add("row");
   }
+  public static final List<OrcProto.Type.Kind> acidEventOrcTypeKinds =
+      new ArrayList<OrcProto.Type.Kind>();
+  static {
+    acidEventOrcTypeKinds.add(OrcProto.Type.Kind.INT);
+    acidEventOrcTypeKinds.add(OrcProto.Type.Kind.LONG);
+    acidEventOrcTypeKinds.add(OrcProto.Type.Kind.INT);
+    acidEventOrcTypeKinds.add(OrcProto.Type.Kind.LONG);
+    acidEventOrcTypeKinds.add(OrcProto.Type.Kind.LONG);
+    acidEventOrcTypeKinds.add(OrcProto.Type.Kind.STRUCT);
+  }
 }

[2/4] hive git commit: Revert "HIVE-12159: Create vectorized readers for the complex types (Owen O'Malley, reviewed by Matt McCline)"

Posted by mm...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
index 460c925..a82d672 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
@@ -32,14 +32,40 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
 import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hive.common.util.HiveTestUtils;
 import org.apache.orc.BinaryColumnStatistics;
@@ -91,10 +117,6 @@ public class TestVectorOrcFile {
   public static class InnerStruct {
     int int1;
     Text string1 = new Text();
-    InnerStruct(int int1, Text string1) {
-      this.int1 = int1;
-      this.string1.set(string1);
-    }
     InnerStruct(int int1, String string1) {
       this.int1 = int1;
       this.string1.set(string1);
@@ -114,6 +136,50 @@ public class TestVectorOrcFile {
     }
   }
 
+  public static class BigRow {
+    Boolean boolean1;
+    Byte byte1;
+    Short short1;
+    Integer int1;
+    Long long1;
+    Float float1;
+    Double double1;
+    BytesWritable bytes1;
+    Text string1;
+    MiddleStruct middle;
+    List<InnerStruct> list = new ArrayList<InnerStruct>();
+    Map<Text, InnerStruct> map = new HashMap<Text, InnerStruct>();
+
+    BigRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
+           Double d1,
+           BytesWritable b3, String s2, MiddleStruct m1,
+           List<InnerStruct> l2, Map<String, InnerStruct> m2) {
+      this.boolean1 = b1;
+      this.byte1 = b2;
+      this.short1 = s1;
+      this.int1 = i1;
+      this.long1 = l1;
+      this.float1 = f1;
+      this.double1 = d1;
+      this.bytes1 = b3;
+      if (s2 == null) {
+        this.string1 = null;
+      } else {
+        this.string1 = new Text(s2);
+      }
+      this.middle = m1;
+      this.list = l2;
+      if (m2 != null) {
+        this.map = new HashMap<Text, InnerStruct>();
+        for (Map.Entry<String, InnerStruct> item : m2.entrySet()) {
+          this.map.put(new Text(item.getKey()), item.getValue());
+        }
+      } else {
+        this.map = null;
+      }
+    }
+  }
+
   private static InnerStruct inner(int i, String s) {
     return new InnerStruct(i, s);
   }
@@ -235,115 +301,206 @@ public class TestVectorOrcFile {
     assertEquals("count: 7500 hasNull: true min: bye max: hi sum: 0", stats[9].toString());
 
     // check the inspectors
-    TypeDescription schema = reader.getSchema();
-    assertEquals(TypeDescription.Category.STRUCT, schema.getCategory());
+    StructObjectInspector readerInspector = (StructObjectInspector) reader
+        .getObjectInspector();
+    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
     assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
         + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
         + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
         + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
         + "map:map<string,struct<int1:int,string1:string>>,ts:timestamp,"
-        + "decimal1:decimal(38,10)>", schema.toString());
-    VectorizedRowBatch batch = schema.createRowBatch();
-
+        + "decimal1:decimal(38,18)>", readerInspector.getTypeName());
+    List<? extends StructField> fields = readerInspector
+        .getAllStructFieldRefs();
+    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector
+        .getStructFieldRef("boolean1").getFieldObjectInspector();
+    ByteObjectInspector by = (ByteObjectInspector) readerInspector
+        .getStructFieldRef("byte1").getFieldObjectInspector();
+    ShortObjectInspector sh = (ShortObjectInspector) readerInspector
+        .getStructFieldRef("short1").getFieldObjectInspector();
+    IntObjectInspector in = (IntObjectInspector) readerInspector
+        .getStructFieldRef("int1").getFieldObjectInspector();
+    LongObjectInspector lo = (LongObjectInspector) readerInspector
+        .getStructFieldRef("long1").getFieldObjectInspector();
+    FloatObjectInspector fl = (FloatObjectInspector) readerInspector
+        .getStructFieldRef("float1").getFieldObjectInspector();
+    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector
+        .getStructFieldRef("double1").getFieldObjectInspector();
+    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector
+        .getStructFieldRef("bytes1").getFieldObjectInspector();
+    StringObjectInspector st = (StringObjectInspector) readerInspector
+        .getStructFieldRef("string1").getFieldObjectInspector();
+    StructObjectInspector mid = (StructObjectInspector) readerInspector
+        .getStructFieldRef("middle").getFieldObjectInspector();
+    List<? extends StructField> midFields = mid.getAllStructFieldRefs();
+    ListObjectInspector midli = (ListObjectInspector) midFields.get(0)
+        .getFieldObjectInspector();
+    StructObjectInspector inner = (StructObjectInspector) midli
+        .getListElementObjectInspector();
+    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+    ListObjectInspector li = (ListObjectInspector) readerInspector
+        .getStructFieldRef("list").getFieldObjectInspector();
+    MapObjectInspector ma = (MapObjectInspector) readerInspector
+        .getStructFieldRef("map").getFieldObjectInspector();
+    TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector
+        .getStructFieldRef("ts").getFieldObjectInspector();
+    HiveDecimalObjectInspector dco = (HiveDecimalObjectInspector) readerInspector
+        .getStructFieldRef("decimal1").getFieldObjectInspector();
+    StringObjectInspector mk = (StringObjectInspector) ma
+        .getMapKeyObjectInspector();
     RecordReader rows = reader.rows();
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
-
+    Object row = rows.next(null);
+    assertNotNull(row);
     // check the contents of the first row
-    assertEquals(false, getBoolean(batch, 0));
-    assertEquals(1, getByte(batch, 0));
-    assertEquals(1024, getShort(batch, 0));
-    assertEquals(65536, getInt(batch, 0));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
-    assertEquals(1.0, getFloat(batch, 0), 0.00001);
-    assertEquals(-15.0, getDouble(batch, 0), 0.00001);
-    assertEquals(bytes(0, 1, 2, 3, 4), getBinary(batch, 0));
-    assertEquals("hi", getText(batch, 0).toString());
-
-    StructColumnVector middle = (StructColumnVector) batch.cols[9];
-    ListColumnVector midList = (ListColumnVector) middle.fields[0];
-    StructColumnVector midListStruct = (StructColumnVector) midList.child;
-    LongColumnVector midListInt = (LongColumnVector) midListStruct.fields[0];
-    BytesColumnVector midListStr = (BytesColumnVector) midListStruct.fields[1];
-    ListColumnVector list = (ListColumnVector) batch.cols[10];
-    StructColumnVector listStruct = (StructColumnVector) list.child;
-    LongColumnVector listInts = (LongColumnVector) listStruct.fields[0];
-    BytesColumnVector listStrs = (BytesColumnVector) listStruct.fields[1];
-    MapColumnVector map = (MapColumnVector) batch.cols[11];
-    BytesColumnVector mapKey = (BytesColumnVector) map.keys;
-    StructColumnVector mapValue = (StructColumnVector) map.values;
-    LongColumnVector mapValueInts = (LongColumnVector) mapValue.fields[0];
-    BytesColumnVector mapValueStrs = (BytesColumnVector) mapValue.fields[1];
-    TimestampColumnVector timestamp = (TimestampColumnVector) batch.cols[12];
-    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[13];
-
-    assertEquals(false, middle.isNull[0]);
-    assertEquals(2, midList.lengths[0]);
-    int start = (int) midList.offsets[0];
-    assertEquals(1, midListInt.vector[start]);
-    assertEquals("bye", midListStr.toString(start));
-    assertEquals(2, midListInt.vector[start + 1]);
-    assertEquals("sigh", midListStr.toString(start + 1));
-
-    assertEquals(2, list.lengths[0]);
-    start = (int) list.offsets[0];
-    assertEquals(3, listInts.vector[start]);
-    assertEquals("good", listStrs.toString(start));
-    assertEquals(4, listInts.vector[start + 1]);
-    assertEquals("bad", listStrs.toString(start + 1));
-    assertEquals(0, map.lengths[0]);
+    assertEquals(false,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(1,
+        by.get(readerInspector.getStructFieldData(row, fields.get(1))));
+    assertEquals(1024,
+        sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
+    assertEquals(65536,
+        in.get(readerInspector.getStructFieldData(row, fields.get(3))));
+    assertEquals(Long.MAX_VALUE,
+        lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
+    assertEquals(1.0,
+        fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
+    assertEquals(-15.0,
+        dbl.get(readerInspector.getStructFieldData(row, fields.get(6))),
+        0.00001);
+    assertEquals(bytes(0, 1, 2, 3, 4),
+        bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row,
+            fields.get(7))));
+    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector
+        .getStructFieldData(row, fields.get(8))));
+    List<?> midRow = midli.getList(mid.getStructFieldData(
+        readerInspector.getStructFieldData(row, fields.get(9)),
+        midFields.get(0)));
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1,
+        in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(0), inFields.get(1))));
+    assertEquals(2,
+        in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(1), inFields.get(1))));
+    List<?> list = li.getList(readerInspector.getStructFieldData(row,
+        fields.get(10)));
+    assertEquals(2, list.size());
+    assertEquals(3,
+        in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
+    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(0), inFields.get(1))));
+    assertEquals(4,
+        in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
+    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(1), inFields.get(1))));
+    Map<?, ?> map = ma.getMap(readerInspector.getStructFieldData(row,
+        fields.get(11)));
+    assertEquals(0, map.size());
     assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
-        timestamp.asScratchTimestamp(0));
-    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")),
-        decs.vector[0]);
+        tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(12))));
+    assertEquals(HiveDecimal.create("12345678.6547456"),
+        dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(13))));
 
-    // check the contents of row 7499
+    // check the contents of second row
+    assertEquals(true, rows.hasNext());
     rows.seekToRow(7499);
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(true, getBoolean(batch, 0));
-    assertEquals(100, getByte(batch, 0));
-    assertEquals(2048, getShort(batch, 0));
-    assertEquals(65536, getInt(batch, 0));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
-    assertEquals(2.0, getFloat(batch, 0), 0.00001);
-    assertEquals(-5.0, getDouble(batch, 0), 0.00001);
-    assertEquals(bytes(), getBinary(batch, 0));
-    assertEquals("bye", getText(batch, 0).toString());
-    assertEquals(false, middle.isNull[0]);
-    assertEquals(2, midList.lengths[0]);
-    start = (int) midList.offsets[0];
-    assertEquals(1, midListInt.vector[start]);
-    assertEquals("bye", midListStr.toString(start));
-    assertEquals(2, midListInt.vector[start + 1]);
-    assertEquals("sigh", midListStr.toString(start + 1));
-    assertEquals(3, list.lengths[0]);
-    start = (int) list.offsets[0];
-    assertEquals(100000000, listInts.vector[start]);
-    assertEquals("cat", listStrs.toString(start));
-    assertEquals(-100000, listInts.vector[start + 1]);
-    assertEquals("in", listStrs.toString(start + 1));
-    assertEquals(1234, listInts.vector[start + 2]);
-    assertEquals("hat", listStrs.toString(start + 2));
-    assertEquals(2, map.lengths[0]);
-    start = (int) map.offsets[0];
-    assertEquals("chani", mapKey.toString(start));
-    assertEquals(5, mapValueInts.vector[start]);
-    assertEquals("chani", mapValueStrs.toString(start));
-    assertEquals("mauddib", mapKey.toString(start + 1));
-    assertEquals(1, mapValueInts.vector[start + 1]);
-    assertEquals("mauddib", mapValueStrs.toString(start + 1));
+    row = rows.next(null);
+    assertEquals(true,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(100,
+        by.get(readerInspector.getStructFieldData(row, fields.get(1))));
+    assertEquals(2048,
+        sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
+    assertEquals(65536,
+        in.get(readerInspector.getStructFieldData(row, fields.get(3))));
+    assertEquals(Long.MAX_VALUE,
+        lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
+    assertEquals(2.0,
+        fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
+    assertEquals(-5.0,
+        dbl.get(readerInspector.getStructFieldData(row, fields.get(6))),
+        0.00001);
+    assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector
+        .getStructFieldData(row, fields.get(7))));
+    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector
+        .getStructFieldData(row, fields.get(8))));
+    midRow = midli.getList(mid.getStructFieldData(
+        readerInspector.getStructFieldData(row, fields.get(9)),
+        midFields.get(0)));
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1,
+        in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(0), inFields.get(1))));
+    assertEquals(2,
+        in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        midRow.get(1), inFields.get(1))));
+    list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
+    assertEquals(3, list.size());
+    assertEquals(100000000,
+        in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
+    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(0), inFields.get(1))));
+    assertEquals(-100000,
+        in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
+    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(1), inFields.get(1))));
+    assertEquals(1234,
+        in.get(inner.getStructFieldData(list.get(2), inFields.get(0))));
+    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData(
+        list.get(2), inFields.get(1))));
+    map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
+    assertEquals(2, map.size());
+    boolean[] found = new boolean[2];
+    for(Object key : map.keySet()) {
+      String str = mk.getPrimitiveJavaObject(key);
+      if (str.equals("chani")) {
+        assertEquals(false, found[0]);
+        assertEquals(5,
+            in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(
+            map.get(key), inFields.get(1))));
+        found[0] = true;
+      } else if (str.equals("mauddib")) {
+        assertEquals(false, found[1]);
+        assertEquals(1,
+            in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData(
+            map.get(key), inFields.get(1))));
+        found[1] = true;
+      } else {
+        throw new IllegalArgumentException("Unknown key " + str);
+      }
+    }
+    assertEquals(true, found[0]);
+    assertEquals(true, found[1]);
     assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
-        timestamp.asScratchTimestamp(0));
-    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547457")),
-        decs.vector[0]);
+        tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(12))));
+    assertEquals(HiveDecimal.create("12345678.6547457"),
+        dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row,
+            fields.get(13))));
 
     // handle the close up
-    assertEquals(false, rows.nextBatch(batch));
+    assertEquals(false, rows.hasNext());
     rows.close();
   }
 
   @Test
   public void testTimestamp() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestVectorOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class,
+          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+
     TypeDescription schema = TypeDescription.createTimestamp();
     Writer writer = OrcFile.createWriter(testFilePath,
         OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
@@ -376,15 +533,11 @@ public class TestVectorOrcFile {
 
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    TimestampColumnVector timestamps = (TimestampColumnVector) batch.cols[0];
+    RecordReader rows = reader.rows(null);
     int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(tslist.get(idx++).getNanos(),
-            timestamps.asScratchTimestamp(r).getNanos());
-      }
+    while (rows.hasNext()) {
+      Object row = rows.next(null);
+      assertEquals(tslist.get(idx++).getNanos(), ((TimestampWritable) row).getNanos());
     }
     assertEquals(tslist.size(), rows.getRowNumber());
     assertEquals(0, writer.getSchema().getMaximumId());
@@ -455,28 +608,50 @@ public class TestVectorOrcFile {
         stats[2].toString());
 
     // check the inspectors
-    batch = reader.getSchema().createRowBatch();
-    BytesColumnVector bytes = (BytesColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
+    StructObjectInspector readerInspector =
+        (StructObjectInspector) reader.getObjectInspector();
+    assertEquals(ObjectInspector.Category.STRUCT,
+        readerInspector.getCategory());
+    assertEquals("struct<bytes1:binary,string1:string>",
+        readerInspector.getTypeName());
+    List<? extends StructField> fields =
+        readerInspector.getAllStructFieldRefs();
+    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+        getStructFieldRef("bytes1").getFieldObjectInspector();
+    StringObjectInspector st = (StringObjectInspector) readerInspector.
+        getStructFieldRef("string1").getFieldObjectInspector();
     RecordReader rows = reader.rows();
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(4, batch.size);
-
+    Object row = rows.next(null);
+    assertNotNull(row);
     // check the contents of the first row
-    assertEquals(bytes(0,1,2,3,4), getBinary(bytes, 0));
-    assertEquals("foo", strs.toString(0));
+    assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(1))));
 
     // check the contents of second row
-    assertEquals(bytes(0,1,2,3), getBinary(bytes, 1));
-    assertEquals("bar", strs.toString(1));
+    assertEquals(true, rows.hasNext());
+    row = rows.next(row);
+    assertEquals(bytes(0,1,2,3), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(1))));
 
     // check the contents of third row
-    assertEquals(bytes(0,1,2,3,4,5), getBinary(bytes, 2));
-    assertNull(strs.toString(2));
+    assertEquals(true, rows.hasNext());
+    row = rows.next(row);
+    assertEquals(bytes(0,1,2,3,4,5), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(0))));
+    assertNull(st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(1))));
 
     // check the contents of fourth row
-    assertNull(getBinary(bytes, 3));
-    assertEquals("hi", strs.toString(3));
+    assertEquals(true, rows.hasNext());
+    row = rows.next(row);
+    assertNull(bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(1))));
 
     // handle the close up
     assertEquals(false, rows.hasNext());
@@ -592,19 +767,6 @@ public class TestVectorOrcFile {
     }
   }
 
-  private static void checkInner(StructColumnVector inner, int rowId,
-                                 int rowInBatch, int i, String value) {
-    assertEquals("row " + rowId, i,
-        ((LongColumnVector) inner.fields[0]).vector[rowInBatch]);
-    if (value != null) {
-      assertEquals("row " + rowId, value,
-          ((BytesColumnVector) inner.fields[1]).toString(rowInBatch));
-    } else {
-      assertEquals("row " + rowId, true, inner.fields[1].isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, inner.fields[1].noNulls);
-    }
-  }
-
   private static void setInnerList(ListColumnVector list, int rowId,
                                    List<InnerStruct> value) {
     if (value != null) {
@@ -625,23 +787,6 @@ public class TestVectorOrcFile {
     }
   }
 
-  private static void checkInnerList(ListColumnVector list, int rowId,
-                                     int rowInBatch, List<InnerStruct> value) {
-    if (value != null) {
-      assertEquals("row " + rowId, value.size(), list.lengths[rowInBatch]);
-      int start = (int) list.offsets[rowInBatch];
-      for (int i = 0; i < list.lengths[rowInBatch]; ++i) {
-        InnerStruct inner = value.get(i);
-        checkInner((StructColumnVector) list.child, rowId, i + start,
-            inner.int1, inner.string1.toString());
-      }
-      list.childCount += value.size();
-    } else {
-      assertEquals("row " + rowId, true, list.isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, list.noNulls);
-    }
-  }
-
   private static void setInnerMap(MapColumnVector map, int rowId,
                                   Map<String, InnerStruct> value) {
     if (value != null) {
@@ -667,24 +812,6 @@ public class TestVectorOrcFile {
     }
   }
 
-  private static void checkInnerMap(MapColumnVector map, int rowId,
-                                    int rowInBatch,
-                                    Map<String, InnerStruct> value) {
-    if (value != null) {
-      assertEquals("row " + rowId, value.size(), map.lengths[rowInBatch]);
-      int offset = (int) map.offsets[rowInBatch];
-      for(int i=0; i < value.size(); ++i) {
-        String key = ((BytesColumnVector) map.keys).toString(offset + i);
-        InnerStruct expected = value.get(key);
-        checkInner((StructColumnVector) map.values, rowId, offset + i,
-            expected.int1, expected.string1.toString());
-      }
-    } else {
-      assertEquals("row " + rowId, true, map.isNull[rowId]);
-      assertEquals("row " + rowId, false, map.noNulls);
-    }
-  }
-
   private static void setMiddleStruct(StructColumnVector middle, int rowId,
                                       MiddleStruct value) {
     if (value != null) {
@@ -695,17 +822,6 @@ public class TestVectorOrcFile {
     }
   }
 
-  private static void checkMiddleStruct(StructColumnVector middle, int rowId,
-                                        int rowInBatch, MiddleStruct value) {
-    if (value != null) {
-      checkInnerList((ListColumnVector) middle.fields[0], rowId, rowInBatch,
-          value.list);
-    } else {
-      assertEquals("row " + rowId, true, middle.isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, middle.noNulls);
-    }
-  }
-
   private static void setBigRow(VectorizedRowBatch batch, int rowId,
                                 Boolean b1, Byte b2, Short s1,
                                 Integer i1, Long l1, Float f1,
@@ -737,160 +853,6 @@ public class TestVectorOrcFile {
     setInnerMap((MapColumnVector) batch.cols[11], rowId, m2);
   }
 
-  private static void checkBigRow(VectorizedRowBatch batch,
-                                  int rowInBatch,
-                                  int rowId,
-                                  boolean b1, byte b2, short s1,
-                                  int i1, long l1, float f1,
-                                  double d1, BytesWritable b3, String s2,
-                                  MiddleStruct m1, List<InnerStruct> l2,
-                                  Map<String, InnerStruct> m2) {
-    assertEquals("row " + rowId, b1, getBoolean(batch, rowInBatch));
-    assertEquals("row " + rowId, b2, getByte(batch, rowInBatch));
-    assertEquals("row " + rowId, s1, getShort(batch, rowInBatch));
-    assertEquals("row " + rowId, i1, getInt(batch, rowInBatch));
-    assertEquals("row " + rowId, l1, getLong(batch, rowInBatch));
-    assertEquals("row " + rowId, f1, getFloat(batch, rowInBatch), 0.0001);
-    assertEquals("row " + rowId, d1, getDouble(batch, rowInBatch), 0.0001);
-    if (b3 != null) {
-      BytesColumnVector bytes = (BytesColumnVector) batch.cols[7];
-      assertEquals("row " + rowId, b3.getLength(), bytes.length[rowInBatch]);
-      for(int i=0; i < b3.getLength(); ++i) {
-        assertEquals("row " + rowId + " byte " + i, b3.getBytes()[i],
-            bytes.vector[rowInBatch][bytes.start[rowInBatch] + i]);
-      }
-    } else {
-      assertEquals("row " + rowId, true, batch.cols[7].isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, batch.cols[7].noNulls);
-    }
-    if (s2 != null) {
-      assertEquals("row " + rowId, s2, getText(batch, rowInBatch).toString());
-    } else {
-      assertEquals("row " + rowId, true, batch.cols[8].isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, batch.cols[8].noNulls);
-    }
-    checkMiddleStruct((StructColumnVector) batch.cols[9], rowId, rowInBatch,
-        m1);
-    checkInnerList((ListColumnVector) batch.cols[10], rowId, rowInBatch, l2);
-    checkInnerMap((MapColumnVector) batch.cols[11], rowId, rowInBatch, m2);
-  }
-
-  private static boolean getBoolean(VectorizedRowBatch batch, int rowId) {
-    return ((LongColumnVector) batch.cols[0]).vector[rowId] != 0;
-  }
-
-  private static byte getByte(VectorizedRowBatch batch, int rowId) {
-    return (byte) ((LongColumnVector) batch.cols[1]).vector[rowId];
-  }
-
-  private static short getShort(VectorizedRowBatch batch, int rowId) {
-    return (short) ((LongColumnVector) batch.cols[2]).vector[rowId];
-  }
-
-  private static int getInt(VectorizedRowBatch batch, int rowId) {
-    return (int) ((LongColumnVector) batch.cols[3]).vector[rowId];
-  }
-
-  private static long getLong(VectorizedRowBatch batch, int rowId) {
-    return ((LongColumnVector) batch.cols[4]).vector[rowId];
-  }
-
-  private static float getFloat(VectorizedRowBatch batch, int rowId) {
-    return (float) ((DoubleColumnVector) batch.cols[5]).vector[rowId];
-  }
-
-  private static double getDouble(VectorizedRowBatch batch, int rowId) {
-    return ((DoubleColumnVector) batch.cols[6]).vector[rowId];
-  }
-
-  private static BytesWritable getBinary(BytesColumnVector column, int rowId) {
-    if (column.isRepeating) {
-      rowId = 0;
-    }
-    if (column.noNulls || !column.isNull[rowId]) {
-      return new BytesWritable(Arrays.copyOfRange(column.vector[rowId],
-          column.start[rowId], column.start[rowId] + column.length[rowId]));
-    } else {
-      return null;
-    }
-  }
-
-  private static BytesWritable getBinary(VectorizedRowBatch batch, int rowId) {
-    return getBinary((BytesColumnVector) batch.cols[7], rowId);
-  }
-
-  private static Text getText(BytesColumnVector vector, int rowId) {
-    if (vector.isRepeating) {
-      rowId = 0;
-    }
-    if (vector.noNulls || !vector.isNull[rowId]) {
-      return new Text(Arrays.copyOfRange(vector.vector[rowId],
-          vector.start[rowId], vector.start[rowId] + vector.length[rowId]));
-    } else {
-      return null;
-    }
-  }
-
-  private static Text getText(VectorizedRowBatch batch, int rowId) {
-    return getText((BytesColumnVector) batch.cols[8], rowId);
-  }
-
-  private static InnerStruct getInner(StructColumnVector vector,
-                                      int rowId) {
-    return new InnerStruct(
-        (int) ((LongColumnVector) vector.fields[0]).vector[rowId],
-        getText((BytesColumnVector) vector.fields[1], rowId));
-  }
-
-  private static List<InnerStruct> getList(ListColumnVector cv,
-                                           int rowId) {
-    if (cv.isRepeating) {
-      rowId = 0;
-    }
-    if (cv.noNulls || !cv.isNull[rowId]) {
-      List<InnerStruct> result =
-          new ArrayList<InnerStruct>((int) cv.lengths[rowId]);
-      for(long i=cv.offsets[rowId];
-          i < cv.offsets[rowId] + cv.lengths[rowId]; ++i) {
-        result.add(getInner((StructColumnVector) cv.child, (int) i));
-      }
-      return result;
-    } else {
-      return null;
-    }
-  }
-
-  private static List<InnerStruct> getMidList(VectorizedRowBatch batch,
-                                              int rowId) {
-    return getList((ListColumnVector) ((StructColumnVector) batch.cols[9])
-        .fields[0], rowId);
-  }
-
-  private static List<InnerStruct> getList(VectorizedRowBatch batch,
-                                           int rowId) {
-    return getList((ListColumnVector) batch.cols[10], rowId);
-  }
-
-  private static Map<Text, InnerStruct> getMap(VectorizedRowBatch batch,
-                                               int rowId) {
-    MapColumnVector cv = (MapColumnVector) batch.cols[11];
-    if (cv.isRepeating) {
-      rowId = 0;
-    }
-    if (cv.noNulls || !cv.isNull[rowId]) {
-      Map<Text, InnerStruct> result =
-          new HashMap<Text, InnerStruct>((int) cv.lengths[rowId]);
-      for(long i=cv.offsets[rowId];
-          i < cv.offsets[rowId] + cv.lengths[rowId]; ++i) {
-        result.put(getText((BytesColumnVector) cv.keys, (int) i),
-            getInner((StructColumnVector) cv.values, (int) i));
-      }
-      return result;
-    } else {
-      return null;
-    }
-  }
-
   private static TypeDescription createInnerSchema() {
     return TypeDescription.createStruct()
         .addField("int1", TypeDescription.createInt())
@@ -1019,114 +981,178 @@ public class TestVectorOrcFile {
 
     assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
 
-    // check the schema
-    TypeDescription readerSchema = reader.getSchema();
-    assertEquals(TypeDescription.Category.STRUCT, readerSchema.getCategory());
+    // check the inspectors
+    StructObjectInspector readerInspector =
+        (StructObjectInspector) reader.getObjectInspector();
+    assertEquals(ObjectInspector.Category.STRUCT,
+        readerInspector.getCategory());
     assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
         + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
         + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
         + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
         + "map:map<string,struct<int1:int,string1:string>>>",
-        readerSchema.toString());
-    List<String> fieldNames = readerSchema.getFieldNames();
-    List<TypeDescription> fieldTypes = readerSchema.getChildren();
-    assertEquals("boolean1", fieldNames.get(0));
-    assertEquals(TypeDescription.Category.BOOLEAN, fieldTypes.get(0).getCategory());
-    assertEquals("byte1", fieldNames.get(1));
-    assertEquals(TypeDescription.Category.BYTE, fieldTypes.get(1).getCategory());
-    assertEquals("short1", fieldNames.get(2));
-    assertEquals(TypeDescription.Category.SHORT, fieldTypes.get(2).getCategory());
-    assertEquals("int1", fieldNames.get(3));
-    assertEquals(TypeDescription.Category.INT, fieldTypes.get(3).getCategory());
-    assertEquals("long1", fieldNames.get(4));
-    assertEquals(TypeDescription.Category.LONG, fieldTypes.get(4).getCategory());
-    assertEquals("float1", fieldNames.get(5));
-    assertEquals(TypeDescription.Category.FLOAT, fieldTypes.get(5).getCategory());
-    assertEquals("double1", fieldNames.get(6));
-    assertEquals(TypeDescription.Category.DOUBLE, fieldTypes.get(6).getCategory());
-    assertEquals("bytes1", fieldNames.get(7));
-    assertEquals(TypeDescription.Category.BINARY, fieldTypes.get(7).getCategory());
-    assertEquals("string1", fieldNames.get(8));
-    assertEquals(TypeDescription.Category.STRING, fieldTypes.get(8).getCategory());
-    assertEquals("middle", fieldNames.get(9));
-    TypeDescription middle = fieldTypes.get(9);
-    assertEquals(TypeDescription.Category.STRUCT, middle.getCategory());
-    TypeDescription midList = middle.getChildren().get(0);
-    assertEquals(TypeDescription.Category.LIST, midList.getCategory());
-    TypeDescription inner = midList.getChildren().get(0);
-    assertEquals(TypeDescription.Category.STRUCT, inner.getCategory());
-    assertEquals("int1", inner.getFieldNames().get(0));
-    assertEquals("string1", inner.getFieldNames().get(1));
-
+        readerInspector.getTypeName());
+    List<? extends StructField> fields =
+        readerInspector.getAllStructFieldRefs();
+    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
+        getStructFieldRef("boolean1").getFieldObjectInspector();
+    ByteObjectInspector by = (ByteObjectInspector) readerInspector.
+        getStructFieldRef("byte1").getFieldObjectInspector();
+    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
+        getStructFieldRef("short1").getFieldObjectInspector();
+    IntObjectInspector in = (IntObjectInspector) readerInspector.
+        getStructFieldRef("int1").getFieldObjectInspector();
+    LongObjectInspector lo = (LongObjectInspector) readerInspector.
+        getStructFieldRef("long1").getFieldObjectInspector();
+    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
+        getStructFieldRef("float1").getFieldObjectInspector();
+    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
+        getStructFieldRef("double1").getFieldObjectInspector();
+    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
+        getStructFieldRef("bytes1").getFieldObjectInspector();
+    StringObjectInspector st = (StringObjectInspector) readerInspector.
+        getStructFieldRef("string1").getFieldObjectInspector();
+    StructObjectInspector mid = (StructObjectInspector) readerInspector.
+        getStructFieldRef("middle").getFieldObjectInspector();
+    List<? extends StructField> midFields =
+        mid.getAllStructFieldRefs();
+    ListObjectInspector midli =
+        (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
+    StructObjectInspector inner = (StructObjectInspector)
+        midli.getListElementObjectInspector();
+    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
+    ListObjectInspector li = (ListObjectInspector) readerInspector.
+        getStructFieldRef("list").getFieldObjectInspector();
+    MapObjectInspector ma = (MapObjectInspector) readerInspector.
+        getStructFieldRef("map").getFieldObjectInspector();
+    StringObjectInspector mk = (StringObjectInspector)
+        ma.getMapKeyObjectInspector();
     RecordReader rows = reader.rows();
-    // create a new batch
-    batch = readerSchema.createRowBatch();
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(2, batch.size);
-    assertEquals(false, rows.hasNext());
-
+    Object row = rows.next(null);
+    assertNotNull(row);
     // check the contents of the first row
-    assertEquals(false, getBoolean(batch, 0));
-    assertEquals(1, getByte(batch, 0));
-    assertEquals(1024, getShort(batch, 0));
-    assertEquals(65536, getInt(batch, 0));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
-    assertEquals(1.0, getFloat(batch, 0), 0.00001);
-    assertEquals(-15.0, getDouble(batch, 0), 0.00001);
-    assertEquals(bytes(0,1,2,3,4), getBinary(batch, 0));
-    assertEquals("hi", getText(batch, 0).toString());
-    List<InnerStruct> midRow = getMidList(batch, 0);
+    assertEquals(false,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(1, by.get(readerInspector.getStructFieldData(row,
+        fields.get(1))));
+    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
+        fields.get(2))));
+    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+        fields.get(3))));
+    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+        getStructFieldData(row, fields.get(4))));
+    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
+        fields.get(5))), 0.00001);
+    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
+        fields.get(6))), 0.00001);
+    assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(7))));
+    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(8))));
+    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.
+        getStructFieldData(row, fields.get(9)), midFields.get(0)));
     assertNotNull(midRow);
     assertEquals(2, midRow.size());
-    assertEquals(1, midRow.get(0).int1);
-    assertEquals("bye", midRow.get(0).string1.toString());
-    assertEquals(2, midRow.get(1).int1);
-    assertEquals("sigh", midRow.get(1).string1.toString());
-    List<InnerStruct> list = getList(batch, 0);
+    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+        inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(0), inFields.get(1))));
+    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+        inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(1), inFields.get(1))));
+    List<?> list = li.getList(readerInspector.getStructFieldData(row,
+        fields.get(10)));
     assertEquals(2, list.size());
-    assertEquals(3, list.get(0).int1);
-    assertEquals("good", list.get(0).string1.toString());
-    assertEquals(4, list.get(1).int1);
-    assertEquals("bad", list.get(1).string1.toString());
-    Map<Text, InnerStruct> map = getMap(batch, 0);
+    assertEquals(3, in.get(inner.getStructFieldData(list.get(0),
+        inFields.get(0))));
+    assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(0), inFields.get(1))));
+    assertEquals(4, in.get(inner.getStructFieldData(list.get(1),
+        inFields.get(0))));
+    assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(1), inFields.get(1))));
+    Map<?,?> map = ma.getMap(readerInspector.getStructFieldData(row,
+        fields.get(11)));
     assertEquals(0, map.size());
 
     // check the contents of second row
-    assertEquals(true, getBoolean(batch, 1));
-    assertEquals(100, getByte(batch, 1));
-    assertEquals(2048, getShort(batch, 1));
-    assertEquals(65536, getInt(batch, 1));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 1));
-    assertEquals(2.0, getFloat(batch, 1), 0.00001);
-    assertEquals(-5.0, getDouble(batch, 1), 0.00001);
-    assertEquals(bytes(), getBinary(batch, 1));
-    assertEquals("bye", getText(batch, 1).toString());
-    midRow = getMidList(batch, 1);
+    assertEquals(true, rows.hasNext());
+    row = rows.next(row);
+    assertEquals(true,
+        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
+    assertEquals(100, by.get(readerInspector.getStructFieldData(row,
+        fields.get(1))));
+    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
+        fields.get(2))));
+    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
+        fields.get(3))));
+    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
+        getStructFieldData(row, fields.get(4))));
+    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
+        fields.get(5))), 0.00001);
+    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
+        fields.get(6))), 0.00001);
+    assertEquals(bytes(), bi.getPrimitiveWritableObject(
+        readerInspector.getStructFieldData(row, fields.get(7))));
+    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
+        getStructFieldData(row, fields.get(8))));
+    midRow = midli.getList(mid.getStructFieldData(readerInspector.
+        getStructFieldData(row, fields.get(9)), midFields.get(0)));
     assertNotNull(midRow);
     assertEquals(2, midRow.size());
-    assertEquals(1, midRow.get(0).int1);
-    assertEquals("bye", midRow.get(0).string1.toString());
-    assertEquals(2, midRow.get(1).int1);
-    assertEquals("sigh", midRow.get(1).string1.toString());
-    list = getList(batch, 1);
+    assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
+        inFields.get(0))));
+    assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(0), inFields.get(1))));
+    assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
+        inFields.get(0))));
+    assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (midRow.get(1), inFields.get(1))));
+    list = li.getList(readerInspector.getStructFieldData(row,
+        fields.get(10)));
     assertEquals(3, list.size());
-    assertEquals(100000000, list.get(0).int1);
-    assertEquals("cat", list.get(0).string1.toString());
-    assertEquals(-100000, list.get(1).int1);
-    assertEquals("in", list.get(1).string1.toString());
-    assertEquals(1234, list.get(2).int1);
-    assertEquals("hat", list.get(2).string1.toString());
-    map = getMap(batch, 1);
+    assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0),
+        inFields.get(0))));
+    assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(0), inFields.get(1))));
+    assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1),
+        inFields.get(0))));
+    assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(1), inFields.get(1))));
+    assertEquals(1234, in.get(inner.getStructFieldData(list.get(2),
+        inFields.get(0))));
+    assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData
+        (list.get(2), inFields.get(1))));
+    map = ma.getMap(readerInspector.getStructFieldData(row,
+        fields.get(11)));
     assertEquals(2, map.size());
-    InnerStruct value = map.get(new Text("chani"));
-    assertEquals(5, value.int1);
-    assertEquals("chani", value.string1.toString());
-    value = map.get(new Text("mauddib"));
-    assertEquals(1, value.int1);
-    assertEquals("mauddib", value.string1.toString());
+    boolean[] found = new boolean[2];
+    for(Object key: map.keySet()) {
+      String str = mk.getPrimitiveJavaObject(key);
+      if (str.equals("chani")) {
+        assertEquals(false, found[0]);
+        assertEquals(5, in.get(inner.getStructFieldData(map.get(key),
+            inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(
+            inner.getStructFieldData(map.get(key), inFields.get(1))));
+        found[0] = true;
+      } else if (str.equals("mauddib")) {
+        assertEquals(false, found[1]);
+        assertEquals(1, in.get(inner.getStructFieldData(map.get(key),
+            inFields.get(0))));
+        assertEquals(str, st.getPrimitiveJavaObject(
+            inner.getStructFieldData(map.get(key), inFields.get(1))));
+        found[1] = true;
+      } else {
+        throw new IllegalArgumentException("Unknown key " + str);
+      }
+    }
+    assertEquals(true, found[0]);
+    assertEquals(true, found[1]);
 
     // handle the close up
-    assertEquals(false, rows.nextBatch(batch));
+    assertEquals(false, rows.hasNext());
     rows.close();
   }
 
@@ -1190,36 +1216,35 @@ public class TestVectorOrcFile {
     }
 
     // check out the types
-    TypeDescription type = reader.getSchema();
-    assertEquals(TypeDescription.Category.STRUCT, type.getCategory());
-    assertEquals(2, type.getChildren().size());
-    TypeDescription type1 = type.getChildren().get(0);
-    TypeDescription type2 = type.getChildren().get(1);
-    assertEquals(TypeDescription.Category.INT, type1.getCategory());
-    assertEquals(TypeDescription.Category.STRING, type2.getCategory());
-    assertEquals("struct<int1:int,string1:string>", type.toString());
+    List<OrcProto.Type> types = reader.getTypes();
+    assertEquals(3, types.size());
+    assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
+    assertEquals(2, types.get(0).getSubtypesCount());
+    assertEquals(1, types.get(0).getSubtypes(0));
+    assertEquals(2, types.get(0).getSubtypes(1));
+    assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
+    assertEquals(0, types.get(1).getSubtypesCount());
+    assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
+    assertEquals(0, types.get(2).getSubtypesCount());
 
     // read the contents and make sure they match
     RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
     RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
     r1 = new Random(1);
     r2 = new Random(2);
-    VectorizedRowBatch batch1 = reader.getSchema().createRowBatch(1000);
-    VectorizedRowBatch batch2 = reader.getSchema().createRowBatch(1000);
-    for(int i = 0; i < 21000; i += 1000) {
-      assertEquals(true, rows1.nextBatch(batch1));
-      assertEquals(true, rows2.nextBatch(batch2));
-      assertEquals(1000, batch1.size);
-      assertEquals(1000, batch2.size);
-      for(int j=0; j < 1000; ++j) {
-        assertEquals(r1.nextInt(),
-            ((LongColumnVector) batch1.cols[0]).vector[j]);
-        assertEquals(Long.toHexString(r2.nextLong()),
-            ((BytesColumnVector) batch2.cols[1]).toString(j));
-      }
-    }
-    assertEquals(false, rows1.nextBatch(batch1));
-    assertEquals(false, rows2.nextBatch(batch2));
+    OrcStruct row1 = null;
+    OrcStruct row2 = null;
+    for(int i = 0; i < 21000; ++i) {
+      assertEquals(true, rows1.hasNext());
+      assertEquals(true, rows2.hasNext());
+      row1 = (OrcStruct) rows1.next(row1);
+      row2 = (OrcStruct) rows2.next(row2);
+      assertEquals(r1.nextInt(), ((IntWritable) row1.getFieldValue(0)).get());
+      assertEquals(Long.toHexString(r2.nextLong()),
+          row2.getFieldValue(1).toString());
+    }
+    assertEquals(false, rows1.hasNext());
+    assertEquals(false, rows2.hasNext());
     rows1.close();
     rows2.close();
   }
@@ -1330,33 +1355,17 @@ public class TestVectorOrcFile {
     Reader reader = OrcFile.createReader(file,
         OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch(1000);
-    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
-    LongColumnVector dates = (LongColumnVector) batch.cols[1];
+    OrcStruct row = null;
     for (int year = minYear; year < maxYear; ++year) {
-      rows.nextBatch(batch);
-      assertEquals(1000, batch.size);
       for(int ms = 1000; ms < 2000; ++ms) {
-        StringBuilder buffer = new StringBuilder();
-        times.stringifyValue(buffer, ms - 1000);
-        String expected = Integer.toString(year) + "-05-05 12:34:56.";
-        // suppress the final zeros on the string by dividing by the largest
-        // power of 10 that divides evenly.
-        int roundedMs = ms;
-        for(int round = 1000; round > 0; round /= 10) {
-          if (ms % round == 0) {
-            roundedMs = ms / round;
-            break;
-          }
-        }
-        expected += roundedMs;
-        assertEquals(expected, buffer.toString());
-        assertEquals(Integer.toString(year) + "-12-25",
-            new DateWritable((int) dates.vector[ms - 1000]).toString());
+        row = (OrcStruct) rows.next(row);
+        assertEquals(new TimestampWritable
+                (Timestamp.valueOf(year + "-05-05 12:34:56." + ms)),
+            row.getFieldValue(0));
+        assertEquals(new DateWritable(new Date(year - 1900, 11, 25)),
+            row.getFieldValue(1));
       }
     }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
   }
 
   @Test
@@ -1474,7 +1483,6 @@ public class TestVectorOrcFile {
     for(int c=0; c < batch.cols.length; ++c) {
       batch.cols[c].setRepeating(true);
     }
-    ((UnionColumnVector) batch.cols[1]).fields[0].isRepeating = true;
     setUnion(batch, 0, null, 0, 1732050807, null, null);
     for(int i=0; i < 5; ++i) {
       writer.addRowBatch(batch);
@@ -1532,115 +1540,83 @@ public class TestVectorOrcFile {
     RecordReader rows = reader.rows();
     assertEquals(0, rows.getRowNumber());
     assertEquals(0.0, rows.getProgress(), 0.000001);
-
-    schema = reader.getSchema();
-    batch = schema.createRowBatch(74);
-    assertEquals(0, rows.getRowNumber());
-    rows.nextBatch(batch);
-    assertEquals(74, batch.size);
-    assertEquals(74, rows.getRowNumber());
-    TimestampColumnVector ts = (TimestampColumnVector) batch.cols[0];
-    UnionColumnVector union = (UnionColumnVector) batch.cols[1];
-    LongColumnVector longs = (LongColumnVector) union.fields[0];
-    BytesColumnVector strs = (BytesColumnVector) union.fields[1];
-    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[2];
-
+    assertEquals(true, rows.hasNext());
+    OrcStruct row = (OrcStruct) rows.next(null);
+    assertEquals(1, rows.getRowNumber());
+    ObjectInspector inspector = reader.getObjectInspector();
     assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
-        schema.toString());
-    assertEquals("2000-03-12 15:00:00.0", ts.asScratchTimestamp(0).toString());
-    assertEquals(0, union.tags[0]);
-    assertEquals(42, longs.vector[0]);
-    assertEquals("12345678.6547456", decs.vector[0].toString());
-
-    assertEquals("2000-03-20 12:00:00.123456789", ts.asScratchTimestamp(1).toString());
-    assertEquals(1, union.tags[1]);
-    assertEquals("hello", strs.toString(1));
-    assertEquals("-5643.234", decs.vector[1].toString());
-
-    assertEquals(false, ts.noNulls);
-    assertEquals(false, union.noNulls);
-    assertEquals(false, decs.noNulls);
-    assertEquals(true, ts.isNull[2]);
-    assertEquals(true, union.isNull[2]);
-    assertEquals(true, decs.isNull[2]);
-
-    assertEquals(true, ts.isNull[3]);
-    assertEquals(false, union.isNull[3]);
-    assertEquals(0, union.tags[3]);
-    assertEquals(true, longs.isNull[3]);
-    assertEquals(true, decs.isNull[3]);
-
-    assertEquals(true, ts.isNull[4]);
-    assertEquals(false, union.isNull[4]);
-    assertEquals(1, union.tags[4]);
-    assertEquals(true, strs.isNull[4]);
-    assertEquals(true, decs.isNull[4]);
-
-    assertEquals(false, ts.isNull[5]);
-    assertEquals("1970-01-01 00:00:00.0", ts.asScratchTimestamp(5).toString());
-    assertEquals(false, union.isNull[5]);
-    assertEquals(0, union.tags[5]);
-    assertEquals(false, longs.isNull[5]);
-    assertEquals(200000, longs.vector[5]);
-    assertEquals(false, decs.isNull[5]);
-    assertEquals("10000000000000000000", decs.vector[5].toString());
-
+        inspector.getTypeName());
+    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-12 15:00:00")),
+        row.getFieldValue(0));
+    OrcUnion union = (OrcUnion) row.getFieldValue(1);
+    assertEquals(0, union.getTag());
+    assertEquals(new IntWritable(42), union.getObject());
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")),
+        row.getFieldValue(2));
+    row = (OrcStruct) rows.next(row);
+    assertEquals(2, rows.getRowNumber());
+    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")),
+        row.getFieldValue(0));
+    assertEquals(1, union.getTag());
+    assertEquals(new Text("hello"), union.getObject());
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")),
+        row.getFieldValue(2));
+    row = (OrcStruct) rows.next(row);
+    assertEquals(null, row.getFieldValue(0));
+    assertEquals(null, row.getFieldValue(1));
+    assertEquals(null, row.getFieldValue(2));
+    row = (OrcStruct) rows.next(row);
+    assertEquals(null, row.getFieldValue(0));
+    union = (OrcUnion) row.getFieldValue(1);
+    assertEquals(0, union.getTag());
+    assertEquals(null, union.getObject());
+    assertEquals(null, row.getFieldValue(2));
+    row = (OrcStruct) rows.next(row);
+    assertEquals(null, row.getFieldValue(0));
+    assertEquals(1, union.getTag());
+    assertEquals(null, union.getObject());
+    assertEquals(null, row.getFieldValue(2));
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new TimestampWritable(Timestamp.valueOf("1970-01-01 00:00:00")),
+        row.getFieldValue(0));
+    assertEquals(new IntWritable(200000), union.getObject());
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("10000000000000000000")),
+                 row.getFieldValue(2));
     rand = new Random(42);
     for(int i=1970; i < 2038; ++i) {
-      int row = 6 + i - 1970;
-      assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
-          ts.asScratchTimestamp(row));
+      row = (OrcStruct) rows.next(row);
+      assertEquals(new TimestampWritable(Timestamp.valueOf(i + "-05-05 12:34:56." + i)),
+          row.getFieldValue(0));
       if ((i & 1) == 0) {
-        assertEquals(0, union.tags[row]);
-        assertEquals(i*i, longs.vector[row]);
+        assertEquals(0, union.getTag());
+        assertEquals(new IntWritable(i*i), union.getObject());
       } else {
-        assertEquals(1, union.tags[row]);
-        assertEquals(Integer.toString(i * i), strs.toString(row));
+        assertEquals(1, union.getTag());
+        assertEquals(new Text(Integer.toString(i * i)), union.getObject());
       }
       assertEquals(new HiveDecimalWritable(HiveDecimal.create(new BigInteger(64, rand),
-                                   rand.nextInt(18))), decs.vector[row]);
-    }
-
-    // rebuild the row batch, so that we can read by 1000 rows
-    batch = schema.createRowBatch(1000);
-    ts = (TimestampColumnVector) batch.cols[0];
-    union = (UnionColumnVector) batch.cols[1];
-    longs = (LongColumnVector) union.fields[0];
-    strs = (BytesColumnVector) union.fields[1];
-    decs = (DecimalColumnVector) batch.cols[2];
-
-    for(int i=0; i < 5; ++i) {
-      rows.nextBatch(batch);
-      assertEquals("batch " + i, 1000, batch.size);
-      assertEquals("batch " + i, false, union.isRepeating);
-      assertEquals("batch " + i, true, union.noNulls);
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals("bad tag at " + i + "." +r, 0, union.tags[r]);
-      }
-      assertEquals("batch " + i, true, longs.isRepeating);
-      assertEquals("batch " + i, 1732050807, longs.vector[0]);
-    }
-
-    rows.nextBatch(batch);
-    assertEquals(3, batch.size);
-    assertEquals(0, union.tags[0]);
-    assertEquals(0, longs.vector[0]);
-    assertEquals(0, union.tags[1]);
-    assertEquals(10, longs.vector[1]);
-    assertEquals(0, union.tags[2]);
-    assertEquals(138, longs.vector[2]);
-
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
+                                   rand.nextInt(18))), row.getFieldValue(2));
+    }
+    for(int i=0; i < 5000; ++i) {
+      row = (OrcStruct) rows.next(row);
+      assertEquals(new IntWritable(1732050807), union.getObject());
+    }
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new IntWritable(0), union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new IntWritable(10), union.getObject());
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new IntWritable(138), union.getObject());
+    assertEquals(false, rows.hasNext());
     assertEquals(1.0, rows.getProgress(), 0.00001);
     assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
     rows.seekToRow(1);
-    rows.nextBatch(batch);
-    assertEquals(1000, batch.size);
-    assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"), ts.asScratchTimestamp(0));
-    assertEquals(1, union.tags[0]);
-    assertEquals("hello", strs.toString(0));
-    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), decs.vector[0]);
+    row = (OrcStruct) rows.next(row);
+    assertEquals(new TimestampWritable(Timestamp.valueOf("2000-03-20 12:00:00.123456789")),
+        row.getFieldValue(0));
+    assertEquals(1, union.getTag());
+    assertEquals(new Text("hello"), union.getObject());
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), row.getFieldValue(2));
     rows.close();
   }
 
@@ -1671,22 +1647,17 @@ public class TestVectorOrcFile {
     writer.close();
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
-    assertEquals(CompressionKind.SNAPPY, reader.getCompressionKind());
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch(1000);
     rand = new Random(12);
-    LongColumnVector longs = (LongColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-    for(int b=0; b < 10; ++b) {
-      rows.nextBatch(batch);
-      assertEquals(1000, batch.size);
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(rand.nextInt(), longs.vector[r]);
-        assertEquals(Integer.toHexString(rand.nextInt()), strs.toString(r));
-      }
+    OrcStruct row = null;
+    for(int i=0; i < 10000; ++i) {
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      assertEquals(rand.nextInt(), ((IntWritable) row.getFieldValue(0)).get());
+      assertEquals(Integer.toHexString(rand.nextInt()),
+          row.getFieldValue(1).toString());
     }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
+    assertEquals(false, rows.hasNext());
     rows.close();
   }
 
@@ -1726,23 +1697,18 @@ public class TestVectorOrcFile {
     assertEquals(0, stripe.getIndexLength());
     RecordReader rows = reader.rows();
     rand = new Random(24);
-    batch = reader.getSchema().createRowBatch(1000);
-    LongColumnVector longs = (LongColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-    for(int i=0; i < 50; ++i) {
-      rows.nextBatch(batch);
-      assertEquals("batch " + i, 1000, batch.size);
-      for(int j=0; j < 200; ++j) {
-        int intVal = rand.nextInt();
-        String strVal = Integer.toBinaryString(rand.nextInt());
-        for (int k = 0; k < 5; ++k) {
-          assertEquals(intVal, longs.vector[j * 5 + k]);
-          assertEquals(strVal, strs.toString(j * 5 + k));
-        }
+    OrcStruct row = null;
+    for(int i=0; i < 10000; ++i) {
+      int intVal = rand.nextInt();
+      String strVal = Integer.toBinaryString(rand.nextInt());
+      for(int j=0; j < 5; ++j) {
+        assertEquals(true, rows.hasNext());
+        row = (OrcStruct) rows.next(row);
+        assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
+        assertEquals(strVal, row.getFieldValue(1).toString());
       }
     }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
+    assertEquals(false, rows.hasNext());
     rows.close();
   }
 
@@ -1806,18 +1772,34 @@ public class TestVectorOrcFile {
       assertEquals(1000,
           colIndex.getEntry(0).getStatistics().getNumberOfValues());
     }
-    batch = reader.getSchema().createRowBatch();
-    int nextRowInBatch = -1;
-    for(int i=COUNT-1; i >= 0; --i, --nextRowInBatch) {
-      // if we have consumed the previous batch read a new one
-      if (nextRowInBatch < 0) {
-        long base = Math.max(i - 1023, 0);
-        rows.seekToRow(base);
-        assertEquals("row " + i, true, rows.nextBatch(batch));
-        nextRowInBatch = batch.size - 1;
-      }
-      checkRandomRow(batch, intValues, doubleValues,
-          stringValues, byteValues, words, i, nextRowInBatch);
+    OrcStruct row = null;
+    for(int i=COUNT-1; i >= 0; --i) {
+      rows.seekToRow(i);
+      row = (OrcStruct) rows.next(row);
+      BigRow expected = createRandomRow(intValues, doubleValues,
+          stringValues, byteValues, words, i);
+      assertEquals(expected.boolean1.booleanValue(),
+          ((BooleanWritable) row.getFieldValue(0)).get());
+      assertEquals(expected.byte1.byteValue(),
+          ((ByteWritable) row.getFieldValue(1)).get());
+      assertEquals(expected.short1.shortValue(),
+          ((ShortWritable) row.getFieldValue(2)).get());
+      assertEquals(expected.int1.intValue(),
+          ((IntWritable) row.getFieldValue(3)).get());
+      assertEquals(expected.long1.longValue(),
+          ((LongWritable) row.getFieldValue(4)).get());
+      assertEquals(expected.float1,
+          ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
+      assertEquals(expected.double1,
+          ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
+      assertEquals(expected.bytes1, row.getFieldValue(7));
+      assertEquals(expected.string1, row.getFieldValue(8));
+      List<InnerStruct> expectedList = expected.middle.list;
+      List<OrcStruct> actualList =
+          (List<OrcStruct>) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
+      compareList(expectedList, actualList, "middle list " + i);
+      compareList(expected.list, (List<OrcStruct>) row.getFieldValue(10),
+          "list " + i);
     }
     rows.close();
     Iterator<StripeInformation> stripeIterator =
@@ -1843,20 +1825,41 @@ public class TestVectorOrcFile {
         .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
         .include(columns));
     rows.seekToRow(lastRowOfStripe2);
-    // we only want two rows
-    batch = reader.getSchema().createRowBatch(2);
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1, batch.size);
-    assertEquals(intValues[(int) lastRowOfStripe2], getLong(batch, 0));
-    assertEquals(stringValues[(int) lastRowOfStripe2],
-        getText(batch, 0).toString());
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(intValues[(int) lastRowOfStripe2 + 1], getLong(batch, 0));
-    assertEquals(stringValues[(int) lastRowOfStripe2 + 1],
-        getText(batch, 0).toString());
+    for(int i = 0; i < 2; ++i) {
+      row = (OrcStruct) rows.next(row);
+      BigRow expected = createRandomRow(intValues, doubleValues,
+                                        stringValues, byteValues, words,
+                                        (int) (lastRowOfStripe2 + i));
+
+      assertEquals(expected.long1.longValue(),
+          ((LongWritable) row.getFieldValue(4)).get());
+      assertEquals(expected.string1, row.getFieldValue(8));
+    }
     rows.close();
   }
 
+  private void compareInner(InnerStruct expect,
+                            OrcStruct actual,
+                            String context) throws Exception {
+    if (expect == null || actual == null) {
+      assertEquals(context, null, expect);
+      assertEquals(context, null, actual);
+    } else {
+      assertEquals(context, expect.int1,
+          ((IntWritable) actual.getFieldValue(0)).get());
+      assertEquals(context, expect.string1, actual.getFieldValue(1));
+    }
+  }
+
+  private void compareList(List<InnerStruct> expect,
+                           List<OrcStruct> actual,
+                           String context) throws Exception {
+    assertEquals(context, expect.size(), actual.size());
+    for(int j=0; j < expect.size(); ++j) {
+      compareInner(expect.get(j), actual.get(j), context + " at " + j);
+    }
+  }
+
   private void appendRandomRow(VectorizedRowBatch batch,
                                long[] intValues, double[] doubleValues,
                                String[] stringValues,
@@ -1871,18 +1874,17 @@ public class TestVectorOrcFile {
         new MiddleStruct(inner, inner2), list(), map(inner, inner2));
   }
 
-  private void checkRandomRow(VectorizedRowBatch batch,
-                              long[] intValues, double[] doubleValues,
-                              String[] stringValues,
-                              BytesWritable[] byteValues,
-                              String[] words, int i, int rowInBatch) {
+  private BigRow createRandomRow(long[] intValues, double[] doubleValues,
+                                 String[] stringValues,
+                                 BytesWritable[] byteValues,
+                                 String[] words, int i) {
     InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
     InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
         words[i % words.length] + "-x");
-    checkBigRow(batch, rowInBatch, i, (intValues[i] & 1) == 0, (byte) intValues[i],
+    return new BigRow((intValues[i] & 1) == 0, (byte) intValues[i],
         (short) intValues[i], (int) intValues[i], intValues[i],
-        (float) doubleValues[i], doubleValues[i], byteValues[i], stringValues[i],
-        new MiddleStruct(inner, inner2), list(), map(inner, inner2));
+        (float) doubleValues[i], doubleValues[i], byteValues[i],stringValues[i],
+        new MiddleStruct(inner, inner2), list(), map(inner,inner2));
   }
 
   private static class MyMemoryManager extends MemoryManager {
@@ -2043,19 +2045,15 @@ public class TestVectorOrcFile {
         .range(0L, Long.MAX_VALUE)
         .include(new boolean[]{true, true, true})
         .searchArgument(sarg, new String[]{null, "int1", "string1"}));
-    batch = reader.getSchema().createRowBatch(2000);
-    LongColumnVector ints = (LongColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-
     assertEquals(1000L, rows.getRowNumber());
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1000, batch.size);
-
+    OrcStruct row = null;
     for(int i=1000; i < 2000; ++i) {
-      assertEquals(300 * i, ints.vector[i - 1000]);
-      assertEquals(Integer.toHexString(10*i), strs.toString(i - 1000));
+      assertTrue(rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
+      assertEquals(Integer.toHexString(10*i), row.getFieldValue(1).toString());
     }
-    assertEquals(false, rows.nextBatch(batch));
+    assertTrue(!rows.hasNext());
     assertEquals(3500, rows.getRowNumber());
 
     // look through the file with no rows selected
@@ -2084,26 +2082,40 @@ public class TestVectorOrcFile {
         .range(0L, Long.MAX_VALUE)
         .include(new boolean[]{true, true, true})
         .searchArgument(sarg, new String[]{null, "int1", "string1"}));
-    assertEquals(0, rows.getRowNumber());
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1000, batch.size);
-    assertEquals(3000, rows.getRowNumber());
+    row = null;
     for(int i=0; i < 1000; ++i) {
-      assertEquals(300 * i, ints.vector[i]);
-      assertEquals(Integer.toHexString(10*i), strs.toString(i));
+      assertTrue(rows.hasNext());
+      assertEquals(i, rows.getRowNumber());
+      row = (OrcStruct) rows.next(row);
+      assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
+      assertEquals(Integer.toHexString(10*i), row.getFieldValue(1).toString());
     }
-
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(500, batch.size);
-    assertEquals(3500, rows.getRowNumber());
     for(int i=3000; i < 3500; ++i) {
-      assertEquals(300 * i, ints.vector[i - 3000]);
-      assertEquals(Integer.toHexString(10*i), strs.toString(i - 3000));
+      assertTrue(rows.hasNext());
+      assertEquals(i, rows.getRowNumber());
+      row = (OrcStruct) rows.next(row);
+      assertEquals(300 * i, ((IntWritable) row.getFieldValue(0)).get());
+      assertEquals(Integer.toHexString(10*i), row.getFieldValue(1).toString());
     }
-    assertEquals(false, rows.nextBatch(batch));
+    assertTrue(!rows.hasNext());
     assertEquals(3500, rows.getRowNumber());
   }
 
+  private static String pad(String value, int length) {
+    if (value.length() == length) {
+      return value;
+    } else if (value.length() > length) {
+      return value.substring(0, length);
+    } else {
+      StringBuilder buf = new StringBuilder();
+      buf.append(value);
+      for(int i=0; i < length - value.length(); ++i) {
+        buf.append(' ');
+      }
+      return buf.toString();
+    }
+  }
+
   /**
    * Test all of the types that have distinct ORC writers using the vectorized
    * writer with different combinations of repeating and null values.
@@ -2220,7 +2232,8 @@ public class TestVectorOrcFile {
       ((LongColumnVector) batch.cols[6]).vector[r] =
           new DateWritable(new Date(111, 6, 1)).getDays() + r;
 
-      Timestamp ts = new Timestamp(115, 9, 25, 10, 11, 59 + r, 999999999);
+      Timestamp ts = new Timestamp(115, 9, 23, 10, 11, 59, 999999999);
+      ts.setTime(ts.getTime() + r * 1000);
       ((TimestampColumnVector) batch.cols[7]).set(r, ts);
       ((DecimalColumnVector) batch.cols[8]).vector[r] =
           new HiveDecimalWritable("1.234567");
@@ -2289,125 +2302,118 @@ public class TestVectorOrcFile {
     assertEquals(14813, ((StringColumnStatistics) stats[12]).getSum());
 
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch(1024);
-    BytesColumnVector bins = (BytesColumnVector) batch.cols[0];
-    LongColumnVector bools = (LongColumnVector) batch.cols[1];
-    LongColumnVector bytes = (LongColumnVector) batch.cols[2];
-    LongColumnVector longs = (LongColumnVector) batch.cols[3];
-    DoubleColumnVector floats = (DoubleColumnVector) batch.cols[4];
-    DoubleColumnVector doubles = (DoubleColumnVector) batch.cols[5];
-    LongColumnVector dates = (LongColumnVector) batch.cols[6];
-    TimestampColumnVector times = (TimestampColumnVector) batch.cols[7];
-    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[8];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[9];
-    BytesColumnVector chars = (BytesColumnVector) batch.cols[10];
-    BytesColumnVector vcs = (BytesColumnVector) batch.cols[11];
-    StructColumnVector structs = (StructColumnVector) batch.cols[12];
-    UnionColumnVector unions = (UnionColumnVector) batch.cols[13];
-    ListColumnVector lists = (ListColumnVector) batch.cols[14];
-    MapColumnVector maps = (MapColumnVector) batch.cols[15];
-    LongColumnVector structInts = (LongColumnVector) structs.fields[0];
-    LongColumnVector unionInts = (LongColumnVector) unions.fields[1];
-    LongColumnVector listInts = (LongColumnVector) lists.child;
-    BytesColumnVector mapKeys = (BytesColumnVector) maps.keys;
-    BytesColumnVector mapValues = (BytesColumnVector) maps.values;
-
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
+    OrcStruct row = null;
 
     // read the 1024 nulls
-    for(int f=0; f < batch.cols.length; ++f) {
-      assertEquals("field " + f,
-          true, batch.cols[f].isRepeating);
-      assertEquals("field " + f,
-          false, batch.cols[f].noNulls);
-      assertEquals("field " + f,
-          true, batch.cols[f].isNull[0]);
+    for(int r=0; r < 1024; ++r) {
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      for(int f=0; f < row.getNumFields(); ++f) {
+        assertEquals("non-null on row " + r + " field " + f,
+            null, row.getFieldValue(f));
+      }
     }
 
     // read the 1024 repeat values
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
     for(int r=0; r < 1024; ++r) {
-      assertEquals("row " + r, "Horton", bins.toString(r));
-      assertEquals("row " + r, 1, bools.vector[r]);
-      assertEquals("row " + r, -126, bytes.vector[r]);
-      assertEquals("row " + r, 1311768467463790320L, longs.vector[r]);
-      assertEquals("row " + r, 1.125, floats.vector[r], 0.00001);
-      assertEquals("row " + r, 9.765625E-4, doubles.vector[r], 0.000001);
-      assertEquals("row " + r, "2011-07-01",
-          new DateWritable((int) dates.vector[r]).toString());
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      assertEquals("row " + r, "48 6f 72 74 6f 6e",
+          row.getFieldValue(0).toString());
+      assertEquals("row " + r, "true", row.getFieldValue(1).toString());
+      assertEquals("row " + r, "-126", row.getFieldValue(2).toString());
+      assertEquals("row " + r, "1311768467463790320",
+          row.getFieldValue(3).toString());
+      assertEquals("row " + r, "1.125", row.getFieldValue(4).toString());
+      assertEquals("row " + r, "9.765625E-4", row.getFieldValue(5).toString());
+      assertEquals("row " + r, "2011-07-01", row.getFieldValue(6).toString());
       assertEquals("row " + r, "2015-10-23 10:11:59.999999999",
-          times.asScratchTimestamp(r).toString());
-      assertEquals("row " + r, "1.234567", decs.vector[r].toString());
-      assertEquals("row " + r, "Echelon", strs.toString(r));
-      assertEquals("row " + r, "Juggernaut", chars.toString(r));
-      assertEquals("row " + r, "Dreadnaugh", vcs.toString(r));
-      assertEquals("row " + r, 123, structInts.vector[r]);
-      assertEquals("row " + r, 1, unions.tags[r]);
-      assertEquals("row " + r, 1234, unionInts.vector[r]);
-      assertEquals("row " + r, 3, lists.lengths[r]);
-      assertEquals("row " + r, true, listInts.isRepeating);
-      assertEquals("row " + r, 31415, listInts.vector[0]);
-      assertEquals("row " + r, 3, maps.lengths[r]);
-      assertEquals("row " + r, "ORC", mapKeys.toString((int) maps.offsets[r]));
-      assertEquals("row " + r, "Hive", mapKeys.toString((int) maps.offsets[r] + 1));
-      assertEquals("row " + r, "LLAP", mapKeys.toString((int) maps.offsets[r] + 2));
-      assertEquals("row " + r, "fast", mapValues.toString((int) maps.offsets[r]));
-      assertEquals("row " + r, "fast", mapValues.toString((int) maps.offsets[r] + 1));
-      assertEquals("row " + r, "fast", mapValues.toString((int) maps.offsets[r] + 2));
+          row.getFieldValue(7).toString());
+      assertEquals("row " + r, "1.234567", row.getFieldValue(8).toString());
+      assertEquals("row " + r, "Echelon", row.getFieldValue(9).toString());
+      assertEquals("row " + r, "Juggernaut", row.getFieldValue(10).toString());
+      assertEquals("row " + r, "Dreadnaugh", row.getFieldValue(11).toString());
+      assertEquals("row " + r, "{123}", row.getFieldValue(12).toString());
+      assertEquals("row " + r, "union(1, 1234)",
+          row.getFieldValue(13).toString());
+      assertEquals("row " + r, "[31415, 31415, 31415]",
+          row.getFieldValue(14).toString());
+      assertEquals("row " + r, "{ORC=fast, Hive=fast, LLAP=fast}",
+          row.getFieldValue(15).toString());
     }
 
     // read the second set of 1024 nulls
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
-    for(int f=0; f < batch.cols.length; ++f) {
-      assertEquals("field " + f,
-          true, batch.cols[f].isRepeating);
-      assertEquals("field " + f,
-          false, batch.cols[f].noNulls);
-      assertEquals("field " + f,
-          true, batch.cols[f].isNull[0]);
+    for(int r=0; r < 1024; ++r) {
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      for(int f=0; f < row.getNumFields(); ++f) {
+        assertEquals("non-null on row " + r + " field " + f,
+            null, row.getFieldValue(f));
+      }
     }
-
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
     for(int r=0; r < 1024; ++r) {
-      String hex = Integer.toHexString(r);
-
-      assertEquals("row " + r, hex, bins.toString(r));
-      assertEquals("row " + r, r % 2 == 1 ? 1 : 0, bools.vector[r]);
-      assertEquals("row " + r, (byte) (r % 255), bytes.vector[r]);
-      assertEquals("row " + r, 31415L * r, longs.vector[r]);
-      assertEquals("row " + r, 1.125F * r, floats.vector[r], 0.0001);
-      assertEquals("row " + r, 0.0009765625 * r, doubles.vector[r], 0.000001);
-      assertEquals("row " + r, new DateWritable(new Date(111, 6, 1 + r)),
-          new DateWritable((int) dates.vector[r]));
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      byte[] hex = Integer.toHexString(r).getBytes();
+      StringBuilder expected = new StringBuilder();
+      for(int i=0; i < hex.length; ++i) {
+        if (i != 0) {
+          expected.append(' ');
+        }
+        expected.append(Integer.toHexString(hex[i]));
+      }
+      assertEquals("row " + r, expected.toString(),
+          row.getFieldValue(0).toString());
+      assertEquals("row " + r, r % 2 == 1 ? "true" : "false",
+          row.getFieldValue(1).toString());
+      assertEquals("row " + r, Integer.toString((byte) (r % 255)),
+          row.getFieldValue(2).toString());
+      assertEquals("row " + r, Long.toString(31415L * r),
+          row.getFieldValue(3).toString());
+      assertEquals("row " + r, Float.toString(1.125F * r),
+          row.getFieldValue(4).toString());
+      assertEquals("row " + r, Double.toString(0.0009765625 * r),
+          row.getFieldValue(5).toString());
+      assertEquals("row " + r, new Date(111, 6, 1 + r).toString(),
+          row.getFieldValue(6).toString());
+      Timestamp ts = new Timestamp(115, 9, 23, 10, 11, 59, 999999999);
+      ts.setTime(ts.getTime() + r * 1000);
       assertEquals("row " + r,
-          new Timestamp(115, 9, 25, 10, 11, 59 + r, 999999999),
-          times.asScratchTimestamp(r));
-      assertEquals("row " + r, "1.234567", decs.vector[r].toString());
-      assertEquals("row " + r, Integer.toString(r), strs.toString(r));
-      assertEquals("row " + r, Integer.toHexString(r), chars.toString(r));
-      assertEquals("row " + r, Integer.toHexString(r * 128), vcs.toString(r));
-      assertEquals("row " + r, r + 13, structInts.vector[r]);
-      assertEquals("row " + r, 1, unions.tags[r]);
-      assertEquals("row " + r, r + 42, unionInts.vector[r]);
-      assertEquals("row " + r, 3, lists.lengths[r]);
-      assertEquals("row " + r, 31415, listInts.vector[(int) lists.offsets[r]]);
-      assertEquals("row " + r, 31416, listInts.vector[(int) lists.offsets[r] + 1]);
-      assertEquals("row " + r, 31417, listInts.vector[(int) lists.offsets[r] + 2]);
-      assertEquals("row " + r, 3, maps.lengths[3]);
-      assertEquals("row " + r, Integer.toHexString(3 * r), mapKeys.toString((int) maps.offsets[r]));
-      assertEquals("row " + r, Integer.toString(3 * r), mapValues.toString((int) maps.offsets[r]));
-      assertEquals("row " + r, Integer.toHexString(3 * r + 1), mapKeys.toString((int) maps.offsets[r] + 1));
-      assertEquals("row " + r, Integer.toString(3 * r + 1), mapValues.toString((int) maps.offsets[r] + 1));
-      assertEquals("row " + r, Integer.toHexString(3 * r + 2), mapKeys.toString((int) maps.offsets[r] + 2));
-      assertEquals("row " + r, Integer.toString(3 * r + 2), mapValues.toString((int) maps.offsets[r] + 2));
+          ts.toString(),
+          row.getFieldValue(7).toString());
+      assertEquals("row " + r, "1.234567", row.getFieldValue(8).toString());
+      assertEquals("row " + r, Integer.toString(r),
+          row.getFieldValue(9).toString());
+      assertEquals("row " + r, pad(Integer.toHexString(r), 10),
+          row.getFieldValue(10).toString());
+      assertEquals("row " + r, Integer.toHexString(r * 128),
+          row.getFieldValue(11).toString());
+      assertEquals("row " + r, "{" + Integer.toString(r + 13) + "}",
+          row.getFieldValue(12).toString());
+      assertEquals("row " + r, "union(1, " + Integer.toString(r + 42) + ")",
+          row.getFieldValue(13).toString());
+      assertEquals("row " + r, "[31415, 31416, 31417]",
+          row.getFieldValue(14).toString());
+      expected = new StringBuilder();
+      expected.append('{');
+      expected.append(Integer.toHexString(3 * r));
+      expected.append('=');
+      expected.append(3 * r);
+      expected.append(", ");
+      expected.append(Integer.toHexString(3 * r + 1));
+      expected.append('=');
+      expected.append(3 * r + 1);
+      expected.append(", ");
+      expected.append(Integer.toHexString(3 * r + 2));
+      expected.append('=');
+      expected.append(3 * r + 2);
+      expected.append('}');
+      assertEquals("row " + r, expected.toString(),
+          row.getFieldValue(15).toString());
     }
 
     // should have no more rows
-    assertEquals(false, rows.nextBatch(batch));
+    assertEquals(false, rows.hasNext());
   }
 
   private static String makeString(BytesColumnVector vector, int row) {
@@ -2449,8 +2455,7 @@ public class TestVectorOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    assertEquals(true, rows.nextBatch(batch));
+    batch = rows.nextBatch(null);
     assertEquals(4, batch.size);
     // ORC currently trims the output strings. See HIVE-12286
     assertEquals("",
@@ -2499,20 +2504,19 @@ public class TestVectorOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    assertEquals(true, rows.nextBatch(batch));
+    batch = rows.nextBatch(null);
     assertEquals(1024, batch.size);
     for(int r=0; r < 1024; ++r) {
       assertEquals(Integer.toString(r * 10001),
           makeString((BytesColumnVector) batch.cols[0], r));
     }
-    assertEquals(true, rows.nextBatch(batch));
+    batch = rows.nextBatch(batch);
     assertEquals(1024, batch.size);
     for(int r=0; r < 1024; ++r) {
       assertEquals("Halloween",
           makeString((BytesColumnVector) batch.cols[0], r));
     }
-    assertEquals(false, rows.nextBatch(batch));
+    assertEquals(false, rows.hasNext());
   }
 
   @Test
@@ -2537,21 +2541,18 @@ public class TestVectorOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    rows.nextBatch(batch);
-    assertEquals(1024, batch.size);
-    StructColumnVector inner = (StructColumnVector) batch.cols[0];
-    LongColumnVector vec = (LongColumnVector) inner.fields[0];
+    OrcStruct row = null;
     for(int r=0; r < 1024; ++r) {
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      OrcStruct inner = (OrcStruct) row.getFieldValue(0);
       if (r < 200 || (r >= 400 && r < 600) || r >= 800) {
-        assertEquals("row " + r, true, inner.isNull[r]);
+        assertEquals("row " + r, null, inner);
       } else {
-        assertEquals("row " + r, false, inner.isNull[r]);
-        assertEquals("row " + r, r, vec.vector[r]);
+        assertEquals("row " + r, "{" + r + "}", inner.toString());
       }
     }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
+    assertEquals(false, rows.hasNext());
   }
 
   /**
@@ -2594,38 +2595,28 @@ public class TestVectorOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch(1024);
-    UnionColumnVector union = (UnionColumnVector) batch.cols[0];
-    LongColumnVector ints = (LongColumnVector) union.fields[0];
-    LongColumnVector longs = (LongColumnVector) union.fields[1];
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
+    OrcStruct row = null;
     for(int r=0; r < 1024; ++r) {
+      assertEquals(true, rows.hasNext());
+      row = (OrcStruct) rows.next(row);
+      OrcUnion inner = (OrcUnion) row.getFieldValue(0);
       if (r < 200) {
-        assertEquals("row " + r, true, union.isNull[r]);
+        assertEquals("row " + r, null, inner);
       } else if (r < 300) {
-        assertEquals("row " + r, false, union.isNull[r]);
-        assertEquals("row " + r, 0, union.tags[r]);
-        assertEquals("row " + r, r, ints.vector[r]);
+        assertEquals("row " + r, "union(0, " + r +")", inner.toString());
       } else if (r < 400) {
-        assertEquals("row " + r, false, union.isNull[r]);
-        assertEquals("row " + r, 1, union.tags[r]);
-        assertEquals("row " + r, -r, longs.vector[r]);
+        assertEquals("row " + r, "union(1, " + -r +")", inner.toString());
       } else if (r < 600) {
-        assertEquals("row " + r, true, union.isNull[r]);
+        assertEquals("row " + r, null, inner);
       } else if (r < 800) {
-        assertEquals("row " + r, false, union.isNull[r]);
-        assertEquals("row " + r, 1, union.tags[r]);
-        assertEquals("row " + r, -r, longs.vector[r]);
+        assertEquals("row " + r, "union(1, " + -r +")", inner.toString());
       } else if (r < 1000) {
-        assertEquals("row " + r, true, union.isNull[r]);
+        assertEquals("row " + r, null, inner);
       } else {
-        assertEquals("row " + r, false, union.isNull[r]);
-        assertEquals("row " + r, 1, union.tags[r]);
-        assertEquals("row " + r, -r, longs.vector[r]);
+        assertEquals("row " + r, "union(1, " + -r +")", inner.toString());
       }
     }
-    assertEquals(false, rows.nextBatch(batch));
+    assertEquals(false, rows.hasNext());
   }
 
   /**
@@ -2672,33 +2663,31 @@ public class TestVectorOrcFile {
     Reader r

<TRUNCATED>

[3/4] hive git commit: Revert "HIVE-12159: Create vectorized readers for the complex types (Owen O'Malley, reviewed by Matt McCline)"

Posted by mm...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index 8ee8cd7..8bb32ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -24,7 +24,6 @@ import java.sql.Timestamp;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -36,12 +35,9 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
@@ -60,7 +56,8 @@ import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.orc.TypeDescription;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.apache.orc.impl.BitFieldReader;
 import org.apache.orc.impl.DynamicByteArray;
 import org.apache.orc.impl.InStream;
@@ -78,6 +75,60 @@ import org.apache.orc.impl.StreamName;
  */
 public class TreeReaderFactory {
 
+  private static final Logger LOG =
+    LoggerFactory.getLogger(TreeReaderFactory.class);
+
+  public static class TreeReaderSchema {
+
+    /**
+     * The types in the ORC file.
+     */
+    List<OrcProto.Type> fileTypes;
+
+    /**
+     * The treeReaderSchema that the reader should read as.
+     */
+    List<OrcProto.Type> schemaTypes;
+
+    /**
+     * The subtype of the row STRUCT.  Different than 0 for ACID.
+     */
+    int innerStructSubtype;
+
+    public TreeReaderSchema() {
+      fileTypes = null;
+      schemaTypes = null;
+      innerStructSubtype = -1;
+    }
+
+    public TreeReaderSchema fileTypes(List<OrcProto.Type> fileTypes) {
+      this.fileTypes = fileTypes;
+      return this;
+    }
+
+    public TreeReaderSchema schemaTypes(List<OrcProto.Type> schemaTypes) {
+      this.schemaTypes = schemaTypes;
+      return this;
+    }
+
+    public TreeReaderSchema innerStructSubtype(int innerStructSubtype) {
+      this.innerStructSubtype = innerStructSubtype;
+      return this;
+    }
+
+    public List<OrcProto.Type> getFileTypes() {
+      return fileTypes;
+    }
+
+    public List<OrcProto.Type> getSchemaTypes() {
+      return schemaTypes;
+    }
+
+    public int getInnerStructSubtype() {
+      return innerStructSubtype;
+    }
+  }
+
   public abstract static class TreeReader {
     protected final int columnId;
     protected BitFieldReader present = null;
@@ -179,60 +230,36 @@ public class TreeReaderFactory {
     }
 
     /**
-     * Called at the top level to read into the given batch.
-     * @param batch the batch to read into
-     * @param batchSize the number of rows to read
-     * @throws IOException
-     */
-    public void nextBatch(VectorizedRowBatch batch,
-                          int batchSize) throws IOException {
-      batch.cols[0].reset();
-      batch.cols[0].ensureSize(batchSize, false);
-      nextVector(batch.cols[0], null, batchSize);
-    }
-
-    /**
      * Populates the isNull vector array in the previousVector object based on
      * the present stream values. This function is called from all the child
      * readers, and they all set the values based on isNull field value.
      *
-     * @param previous The columnVector object whose isNull value is populated
-     * @param isNull Whether the each value was null at a higher level. If
-     *               isNull is null, all values are non-null.
+     * @param previousVector The columnVector object whose isNull value is populated
      * @param batchSize      Size of the column vector
+     * @return next column vector
      * @throws IOException
      */
-    public void nextVector(ColumnVector previous,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      if (present != null || isNull != null) {
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      ColumnVector result = (ColumnVector) previousVector;
+      if (present != null) {
         // Set noNulls and isNull vector of the ColumnVector based on
         // present stream
-        previous.noNulls = true;
-        boolean allNull = true;
+        result.noNulls = true;
         for (int i = 0; i < batchSize; i++) {
-          if (isNull == null || !isNull[i]) {
-            if (present != null && present.next() != 1) {
-              previous.noNulls = false;
-              previous.isNull[i] = true;
-            } else {
-              previous.isNull[i] = false;
-              allNull = false;
-            }
-          } else {
-            previous.noNulls = false;
-            previous.isNull[i] = true;
+          result.isNull[i] = (present.next() != 1);
+          if (result.noNulls && result.isNull[i]) {
+            result.noNulls = false;
           }
         }
-        previous.isRepeating = !previous.noNulls && allNull;
       } else {
-        // There is no present stream, this means that all the values are
+        // There is not present stream, this means that all the values are
         // present.
-        previous.noNulls = true;
+        result.noNulls = true;
         for (int i = 0; i < batchSize; i++) {
-          previous.isNull[i] = false;
+          result.isNull[i] = false;
         }
       }
+      return previousVector;
     }
 
     public BitFieldReader getPresent() {
@@ -240,46 +267,6 @@ public class TreeReaderFactory {
     }
   }
 
-  public static class NullTreeReader extends TreeReader {
-
-    public NullTreeReader(int columnId) throws IOException {
-      super(columnId);
-    }
-
-    @Override
-    public void startStripe(Map<StreamName, InStream> streams,
-                            OrcProto.StripeFooter footer) {
-      // PASS
-    }
-
-    @Override
-    void skipRows(long rows) {
-      // PASS
-    }
-
-    @Override
-    public void seek(PositionProvider position) {
-      // PASS
-    }
-
-    @Override
-    public void seek(PositionProvider[] position) {
-      // PASS
-    }
-
-    @Override
-    Object next(Object previous) {
-      return null;
-    }
-
-    @Override
-    public void nextVector(ColumnVector vector, boolean[] isNull, int size) {
-      vector.noNulls = false;
-      vector.isNull[0] = true;
-      vector.isRepeating = true;
-    }
-  }
-
   public static class BooleanTreeReader extends TreeReader {
     protected BitFieldReader reader = null;
 
@@ -335,16 +322,20 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      LongColumnVector result = (LongColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final LongColumnVector result;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       // Read value entries based on isNull entries
       reader.nextVector(result, batchSize);
+      return result;
     }
   }
 
@@ -396,16 +387,20 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final LongColumnVector result;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
+      reader.nextVector(result, batchSize);
+      return result;
     }
 
     @Override
@@ -478,16 +473,20 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final LongColumnVector result;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
+      reader.nextVector(result, batchSize);
+      return result;
     }
 
     @Override
@@ -560,16 +559,20 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final LongColumnVector result;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
+      reader.nextVector(result, batchSize);
+      return result;
     }
 
     @Override
@@ -643,16 +646,20 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final LongColumnVector result;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
+      reader.nextVector(result, batchSize);
+      return result;
     }
 
     @Override
@@ -712,13 +719,16 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final DoubleColumnVector result = (DoubleColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final DoubleColumnVector result;
+      if (previousVector == null) {
+        result = new DoubleColumnVector();
+      } else {
+        result = (DoubleColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       final boolean hasNulls = !result.noNulls;
       boolean allNulls = hasNulls;
@@ -758,6 +768,7 @@ public class TreeReaderFactory {
         }
         result.isRepeating = repeating;
       }
+      return result;
     }
 
     @Override
@@ -821,13 +832,16 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final DoubleColumnVector result = (DoubleColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final DoubleColumnVector result;
+      if (previousVector == null) {
+        result = new DoubleColumnVector();
+      } else {
+        result = (DoubleColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       final boolean hasNulls = !result.noNulls;
       boolean allNulls = hasNulls;
@@ -867,6 +881,8 @@ public class TreeReaderFactory {
         }
         result.isRepeating = repeating;
       }
+
+      return result;
     }
 
     @Override
@@ -958,15 +974,19 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final BytesColumnVector result = (BytesColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final BytesColumnVector result;
+      if (previousVector == null) {
+        result = new BytesColumnVector();
+      } else {
+        result = (BytesColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
+      return result;
     }
 
     @Override
@@ -991,6 +1011,7 @@ public class TreeReaderFactory {
     private final TimeZone readerTimeZone;
     private TimeZone writerTimeZone;
     private boolean hasSameTZRules;
+    private TimestampWritable scratchTimestampWritable;
 
     TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
       this(columnId, null, null, null, null, skipCorrupt);
@@ -1094,9 +1115,9 @@ public class TreeReaderFactory {
         int newNanos = parseNanos(nanos.next());
         // fix the rounding when we divided by 1000.
         if (millis >= 0) {
-          millis += newNanos / WriterImpl.NANOS_PER_MILLI;
+          millis += newNanos / 1000000;
         } else {
-          millis -= newNanos / WriterImpl.NANOS_PER_MILLI;
+          millis -= newNanos / 1000000;
         }
         long offset = 0;
         // If reader and writer time zones have different rules, adjust the timezone difference
@@ -1123,45 +1144,31 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      TimestampColumnVector result = (TimestampColumnVector) previousVector;
-      super.nextVector(previousVector, isNull, batchSize);
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final TimestampColumnVector result;
+      if (previousVector == null) {
+        result = new TimestampColumnVector();
+      } else {
+        result = (TimestampColumnVector) previousVector;
+      }
 
+      result.reset();
+      if (scratchTimestampWritable == null) {
+        scratchTimestampWritable = new TimestampWritable();
+      }
+      Object obj;
       for (int i = 0; i < batchSize; i++) {
-        if (result.noNulls || !result.isNull[i]) {
-          long millis = data.next() + base_timestamp;
-          int newNanos = parseNanos(nanos.next());
-          if (millis < 0 && newNanos != 0) {
-            millis -= 1;
-          }
-          millis *= WriterImpl.MILLIS_PER_SECOND;
-          long offset = 0;
-          // If reader and writer time zones have different rules, adjust the timezone difference
-          // between reader and writer taking day light savings into account.
-          if (!hasSameTZRules) {
-            offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis);
-          }
-          long adjustedMillis = millis + offset;
-          // Sometimes the reader timezone might have changed after adding the adjustedMillis.
-          // To account for that change, check for any difference in reader timezone after
-          // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time).
-          if (!hasSameTZRules &&
-              (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) {
-            long newOffset =
-                writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis);
-            adjustedMillis = millis + newOffset;
-          }
-          result.time[i] = adjustedMillis;
-          result.nanos[i] = newNanos;
-          if (result.isRepeating && i != 0 &&
-              (result.time[0] != result.time[i] ||
-                  result.nanos[0] != result.nanos[i])) {
-            result.isRepeating = false;
-          }
+        obj = next(scratchTimestampWritable);
+        if (obj == null) {
+          result.noNulls = false;
+          result.isNull[i] = true;
+        } else {
+          TimestampWritable writable = (TimestampWritable) obj;
+          result.set(i, writable.getTimestamp());
         }
       }
+
+      return result;
     }
 
     private static int parseNanos(long serialized) {
@@ -1246,16 +1253,20 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final LongColumnVector result;
+      if (previousVector == null) {
+        result = new LongColumnVector();
+      } else {
+        result = (LongColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
+      reader.nextVector(result, batchSize);
+      return result;
     }
 
     @Override
@@ -1267,7 +1278,7 @@ public class TreeReaderFactory {
   public static class DecimalTreeReader extends TreeReader {
     protected InStream valueStream;
     protected IntegerReader scaleReader = null;
-    private int[] scratchScaleVector;
+    private LongColumnVector scratchScaleVector;
 
     private final int precision;
     private final int scale;
@@ -1282,7 +1293,7 @@ public class TreeReaderFactory {
       super(columnId, present);
       this.precision = precision;
       this.scale = scale;
-      this.scratchScaleVector = new int[VectorizedRowBatch.DEFAULT_SIZE];
+      this.scratchScaleVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
       this.valueStream = valueStream;
       if (scaleStream != null && encoding != null) {
         checkEncoding(encoding);
@@ -1341,34 +1352,46 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final DecimalColumnVector result = (DecimalColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final DecimalColumnVector result;
+      if (previousVector == null) {
+        result = new DecimalColumnVector(precision, scale);
+      } else {
+        result = (DecimalColumnVector) previousVector;
+      }
+
+      // Save the reference for isNull in the scratch vector
+      boolean[] scratchIsNull = scratchScaleVector.isNull;
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
-      if (batchSize > scratchScaleVector.length) {
-        scratchScaleVector = new int[(int) batchSize];
-      }
-      scaleReader.nextVector(result, scratchScaleVector, batchSize);
       // Read value entries based on isNull entries
-      if (result.noNulls) {
-        for (int r=0; r < batchSize; ++r) {
+      if (result.isRepeating) {
+        if (!result.isNull[0]) {
           BigInteger bInt = SerializationUtils.readBigInteger(valueStream);
-          HiveDecimal dec = HiveDecimal.create(bInt, scratchScaleVector[r]);
-          result.set(r, dec);
+          short scaleInData = (short) scaleReader.next();
+          HiveDecimal dec = HiveDecimal.create(bInt, scaleInData);
+          dec = HiveDecimal.enforcePrecisionScale(dec, precision, scale);
+          result.set(0, dec);
         }
-      } else if (!result.isRepeating || !result.isNull[0]) {
-        for (int r=0; r < batchSize; ++r) {
-          if (!result.isNull[r]) {
+      } else {
+        // result vector has isNull values set, use the same to read scale vector.
+        scratchScaleVector.isNull = result.isNull;
+        scaleReader.nextVector(scratchScaleVector, batchSize);
+        for (int i = 0; i < batchSize; i++) {
+          if (!result.isNull[i]) {
             BigInteger bInt = SerializationUtils.readBigInteger(valueStream);
-            HiveDecimal dec = HiveDecimal.create(bInt, scratchScaleVector[r]);
-            result.set(r, dec);
+            short scaleInData = (short) scratchScaleVector.vector[i];
+            HiveDecimal dec = HiveDecimal.create(bInt, scaleInData);
+            dec = HiveDecimal.enforcePrecisionScale(dec, precision, scale);
+            result.set(i, dec);
           }
         }
       }
+      // Switch back the null vector.
+      scratchScaleVector.isNull = scratchIsNull;
+      return result;
     }
 
     @Override
@@ -1458,10 +1481,8 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      reader.nextVector(previousVector, isNull, batchSize);
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      return reader.nextVector(previousVector, batchSize);
     }
 
     @Override
@@ -1480,7 +1501,7 @@ public class TreeReaderFactory {
         BytesColumnVector result, final int batchSize) throws IOException {
       // Read lengths
       scratchlcv.isNull = result.isNull;  // Notice we are replacing the isNull vector here...
-      lengths.nextVector(scratchlcv, scratchlcv.vector, batchSize);
+      lengths.nextVector(scratchlcv, batchSize);
       int totalLength = 0;
       if (!scratchlcv.isRepeating) {
         for (int i = 0; i < batchSize; i++) {
@@ -1511,35 +1532,31 @@ public class TreeReaderFactory {
     }
 
     // This method has the common code for reading in bytes into a BytesColumnVector.
-    public static void readOrcByteArrays(InStream stream,
-                                         IntegerReader lengths,
-                                         LongColumnVector scratchlcv,
-                                         BytesColumnVector result,
-                                         int batchSize) throws IOException {
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv,
-            result, (int) batchSize);
-
-        // Too expensive to figure out 'repeating' by comparisons.
-        result.isRepeating = false;
-        int offset = 0;
-        if (!scratchlcv.isRepeating) {
-          for (int i = 0; i < batchSize; i++) {
-            if (!scratchlcv.isNull[i]) {
-              result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
-              offset += scratchlcv.vector[i];
-            } else {
-              result.setRef(i, allBytes, 0, 0);
-            }
+    public static void readOrcByteArrays(InStream stream, IntegerReader lengths,
+        LongColumnVector scratchlcv,
+        BytesColumnVector result, final int batchSize) throws IOException {
+
+      byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv, result, batchSize);
+
+      // Too expensive to figure out 'repeating' by comparisons.
+      result.isRepeating = false;
+      int offset = 0;
+      if (!scratchlcv.isRepeating) {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
+            offset += scratchlcv.vector[i];
+          } else {
+            result.setRef(i, allBytes, 0, 0);
           }
-        } else {
-          for (int i = 0; i < batchSize; i++) {
-            if (!scratchlcv.isNull[i]) {
-              result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
-              offset += scratchlcv.vector[0];
-            } else {
-              result.setRef(i, allBytes, 0, 0);
-            }
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
+            offset += scratchlcv.vector[0];
+          } else {
+            result.setRef(i, allBytes, 0, 0);
           }
         }
       }
@@ -1624,16 +1641,19 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final BytesColumnVector result = (BytesColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final BytesColumnVector result;
+      if (previousVector == null) {
+        result = new BytesColumnVector();
+      } else {
+        result = (BytesColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
-      BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv,
-          result, batchSize);
+      BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
+      return result;
     }
 
     @Override
@@ -1796,15 +1816,18 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      final BytesColumnVector result = (BytesColumnVector) previousVector;
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final BytesColumnVector result;
       int offset;
       int length;
+      if (previousVector == null) {
+        result = new BytesColumnVector();
+      } else {
+        result = (BytesColumnVector) previousVector;
+      }
 
       // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
+      super.nextVector(result, batchSize);
 
       if (dictionaryBuffer != null) {
 
@@ -1815,8 +1838,7 @@ public class TreeReaderFactory {
 
         // Read string offsets
         scratchlcv.isNull = result.isNull;
-        scratchlcv.ensureSize((int) batchSize, false);
-        reader.nextVector(scratchlcv, scratchlcv.vector, batchSize);
+        reader.nextVector(scratchlcv, batchSize);
         if (!scratchlcv.isRepeating) {
 
           // The vector has non-repeating strings. Iterate thru the batch
@@ -1856,6 +1878,7 @@ public class TreeReaderFactory {
           }
         }
       }
+      return result;
     }
 
     int getDictionaryEntryLength(int entry, int offset) {
@@ -1913,13 +1936,11 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
       // Get the vector of strings from StringTreeReader, then make a 2nd pass to
       // adjust down the length (right trim and truncate) if necessary.
-      super.nextVector(previousVector, isNull, batchSize);
-      BytesColumnVector result = (BytesColumnVector) previousVector;
+      BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize);
+
       int adjustedDownLen;
       if (result.isRepeating) {
         if (result.noNulls || !result.isNull[0]) {
@@ -1952,6 +1973,7 @@ public class TreeReaderFactory {
           }
         }
       }
+      return result;
     }
   }
 
@@ -1988,13 +2010,10 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
       // Get the vector of strings from StringTreeReader, then make a 2nd pass to
       // adjust down the length (truncate) if necessary.
-      super.nextVector(previousVector, isNull, batchSize);
-      BytesColumnVector result = (BytesColumnVector) previousVector;
+      BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize);
 
       int adjustedDownLen;
       if (result.isRepeating) {
@@ -2026,26 +2045,62 @@ public class TreeReaderFactory {
           }
         }
       }
+      return result;
     }
   }
 
   protected static class StructTreeReader extends TreeReader {
+    private final int readColumnCount;
+    private final int resultColumnCount;
     protected final TreeReader[] fields;
+    private final String[] fieldNames;
 
-    protected StructTreeReader(int columnId,
-                               TypeDescription readerSchema,
-                               SchemaEvolution treeReaderSchema,
-                               boolean[] included,
-                               boolean skipCorrupt) throws IOException {
+    protected StructTreeReader(
+        int columnId,
+        TreeReaderSchema treeReaderSchema,
+        boolean[] included,
+        boolean skipCorrupt) throws IOException {
       super(columnId);
 
-      TypeDescription fileSchema = treeReaderSchema.getFileType(readerSchema);
+      OrcProto.Type fileStructType = treeReaderSchema.getFileTypes().get(columnId);
+
+      OrcProto.Type schemaStructType = treeReaderSchema.getSchemaTypes().get(columnId);
 
-      List<TypeDescription> childrenTypes = readerSchema.getChildren();
-      this.fields = new TreeReader[childrenTypes.size()];
-      for (int i = 0; i < fields.length; ++i) {
-        TypeDescription subtype = childrenTypes.get(i);
-        this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+      readColumnCount = Math.min(fileStructType.getFieldNamesCount(), schemaStructType.getFieldNamesCount());
+
+      if (columnId == treeReaderSchema.getInnerStructSubtype()) {
+        // If there are more result columns than reader columns, we will default those additional
+        // columns to NULL.
+        resultColumnCount = schemaStructType.getFieldNamesCount();
+      } else {
+        resultColumnCount = readColumnCount;
+      }
+
+      this.fields = new TreeReader[readColumnCount];
+      this.fieldNames = new String[readColumnCount];
+
+      if (included == null) {
+        for (int i = 0; i < readColumnCount; ++i) {
+          int subtype = schemaStructType.getSubtypes(i);
+          this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+          // Use the treeReaderSchema evolution name since file/reader types may not have the real column name.
+          this.fieldNames[i] = schemaStructType.getFieldNames(i);
+        }
+      } else {
+        for (int i = 0; i < readColumnCount; ++i) {
+          int subtype = schemaStructType.getSubtypes(i);
+          if (subtype >= included.length) {
+            throw new IOException("subtype " + subtype + " exceeds the included array size " +
+                included.length + " fileTypes " + treeReaderSchema.getFileTypes().toString() +
+                " schemaTypes " + treeReaderSchema.getSchemaTypes().toString() +
+                " innerStructSubtype " + treeReaderSchema.getInnerStructSubtype());
+          }
+          if (included[subtype]) {
+            this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+          }
+          // Use the treeReaderSchema evolution name since file/reader types may not have the real column name.
+          this.fieldNames[i] = schemaStructType.getFieldNames(i);
+        }
       }
     }
 
@@ -2065,52 +2120,65 @@ public class TreeReaderFactory {
       OrcStruct result = null;
       if (valuePresent) {
         if (previous == null) {
-          result = new OrcStruct(fields.length);
+          result = new OrcStruct(resultColumnCount);
         } else {
           result = (OrcStruct) previous;
 
           // If the input format was initialized with a file with a
           // different number of fields, the number of fields needs to
           // be updated to the correct number
-          result.setNumFields(fields.length);
+          if (result.getNumFields() != resultColumnCount) {
+            result.setNumFields(resultColumnCount);
+          }
         }
-        for (int i = 0; i < fields.length; ++i) {
+        for (int i = 0; i < readColumnCount; ++i) {
           if (fields[i] != null) {
             result.setFieldValue(i, fields[i].next(result.getFieldValue(i)));
           }
         }
+        if (resultColumnCount > readColumnCount) {
+          for (int i = readColumnCount; i < resultColumnCount; ++i) {
+            // Default new treeReaderSchema evolution fields to NULL.
+            result.setFieldValue(i, null);
+          }
+        }
       }
       return result;
     }
 
     @Override
-    public void nextBatch(VectorizedRowBatch batch,
-                          int batchSize) throws IOException {
-      for(int i=0; i < fields.length &&
-          (vectorColumnCount == -1 || i < vectorColumnCount); ++i) {
-        batch.cols[i].reset();
-        batch.cols[i].ensureSize((int) batchSize, false);
-        fields[i].nextVector(batch.cols[i], null, batchSize);
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      final ColumnVector[] result;
+      if (previousVector == null) {
+        result = new ColumnVector[readColumnCount];
+      } else {
+        result = (ColumnVector[]) previousVector;
       }
-    }
 
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      super.nextVector(previousVector, isNull, batchSize);
-      StructColumnVector result = (StructColumnVector) previousVector;
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        result.isRepeating = false;
+      // Read all the members of struct as column vectors
+      for (int i = 0; i < readColumnCount; i++) {
+        if (fields[i] != null) {
+          if (result[i] == null) {
+            result[i] = (ColumnVector) fields[i].nextVector(null, batchSize);
+          } else {
+            fields[i].nextVector(result[i], batchSize);
+          }
+        }
+      }
 
-        // Read all the members of struct as column vectors
-        boolean[] mask = result.noNulls ? null : result.isNull;
-        for (int f = 0; f < fields.length; f++) {
-          if (fields[f] != null) {
-            fields[f].nextVector(result.fields[f], mask, batchSize);
+      // Default additional treeReaderSchema evolution fields to NULL.
+      if (vectorColumnCount != -1 && vectorColumnCount > readColumnCount) {
+        for (int i = readColumnCount; i < vectorColumnCount; ++i) {
+          ColumnVector colVector = result[i];
+          if (colVector != null) {
+            colVector.isRepeating = true;
+            colVector.noNulls = false;
+            colVector.isNull[0] = true;
           }
         }
       }
+
+      return result;
     }
 
     @Override
@@ -2140,18 +2208,19 @@ public class TreeReaderFactory {
     protected final TreeReader[] fields;
     protected RunLengthByteReader tags;
 
-    protected UnionTreeReader(int fileColumn,
-                              TypeDescription readerSchema,
-                              SchemaEvolution treeReaderSchema,
-                              boolean[] included,
-                              boolean skipCorrupt) throws IOException {
-      super(fileColumn);
-      List<TypeDescription> childrenTypes = readerSchema.getChildren();
-      int fieldCount = childrenTypes.size();
+    protected UnionTreeReader(int columnId,
+        TreeReaderSchema treeReaderSchema,
+        boolean[] included,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId);
+      int fieldCount = type.getSubtypesCount();
       this.fields = new TreeReader[fieldCount];
       for (int i = 0; i < fieldCount; ++i) {
-        TypeDescription subtype = childrenTypes.get(i);
-        this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+        int subtype = type.getSubtypes(i);
+        if (included == null || included[subtype]) {
+          this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+        }
       }
     }
 
@@ -2183,25 +2252,9 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      UnionColumnVector result = (UnionColumnVector) previousVector;
-      super.nextVector(result, isNull, batchSize);
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        result.isRepeating = false;
-        tags.nextVector(result.noNulls ? null : result.isNull, result.tags,
-            batchSize);
-        boolean[] ignore = new boolean[(int) batchSize];
-        for (int f = 0; f < result.fields.length; ++f) {
-          // build the ignore list for this tag
-          for (int r = 0; r < batchSize; ++r) {
-            ignore[r] = (!result.noNulls && result.isNull[r]) ||
-                result.tags[r] != f;
-          }
-          fields[f].nextVector(result.fields[f], ignore, batchSize);
-        }
-      }
+    public Object nextVector(Object previousVector, final int batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for Union type");
     }
 
     @Override
@@ -2235,15 +2288,13 @@ public class TreeReaderFactory {
     protected final TreeReader elementReader;
     protected IntegerReader lengths = null;
 
-    protected ListTreeReader(int fileColumn,
-                             TypeDescription readerSchema,
-                             SchemaEvolution treeReaderSchema,
-                             boolean[] included,
-                             boolean skipCorrupt) throws IOException {
-      super(fileColumn);
-      TypeDescription elementType = readerSchema.getChildren().get(0);
-      elementReader = createTreeReader(elementType, treeReaderSchema, included,
-          skipCorrupt);
+    protected ListTreeReader(int columnId,
+        TreeReaderSchema treeReaderSchema,
+        boolean[] included,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId);
+      elementReader = createTreeReader(type.getSubtypes(0), treeReaderSchema, included, skipCorrupt);
     }
 
     @Override
@@ -2284,27 +2335,9 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previous,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      ListColumnVector result = (ListColumnVector) previous;
-      super.nextVector(result, isNull, batchSize);
-      // if we have some none-null values, then read them
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        lengths.nextVector(result, result.lengths, batchSize);
-        // even with repeating lengths, the list doesn't repeat
-        result.isRepeating = false;
-        // build the offsets vector and figure out how many children to read
-        result.childCount = 0;
-        for (int r = 0; r < batchSize; ++r) {
-          if (result.noNulls || !result.isNull[r]) {
-            result.offsets[r] = result.childCount;
-            result.childCount += result.lengths[r];
-          }
-        }
-        result.child.ensureSize(result.childCount, false);
-        elementReader.nextVector(result.child, null, result.childCount);
-      }
+    public Object nextVector(Object previous, final int batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for List type");
     }
 
     @Override
@@ -2345,16 +2378,24 @@ public class TreeReaderFactory {
     protected final TreeReader valueReader;
     protected IntegerReader lengths = null;
 
-    protected MapTreeReader(int fileColumn,
-                            TypeDescription readerSchema,
-                            SchemaEvolution treeReaderSchema,
-                            boolean[] included,
-                            boolean skipCorrupt) throws IOException {
-      super(fileColumn);
-      TypeDescription keyType = readerSchema.getChildren().get(0);
-      TypeDescription valueType = readerSchema.getChildren().get(1);
-      keyReader = createTreeReader(keyType, treeReaderSchema, included, skipCorrupt);
-      valueReader = createTreeReader(valueType, treeReaderSchema, included, skipCorrupt);
+    protected MapTreeReader(int columnId,
+        TreeReaderSchema treeReaderSchema,
+        boolean[] included,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId);
+      int keyColumn = type.getSubtypes(0);
+      int valueColumn = type.getSubtypes(1);
+      if (included == null || included[keyColumn]) {
+        keyReader = createTreeReader(keyColumn, treeReaderSchema, included, skipCorrupt);
+      } else {
+        keyReader = null;
+      }
+      if (included == null || included[valueColumn]) {
+        valueReader = createTreeReader(valueColumn, treeReaderSchema, included, skipCorrupt);
+      } else {
+        valueReader = null;
+      }
     }
 
     @Override
@@ -2388,28 +2429,9 @@ public class TreeReaderFactory {
     }
 
     @Override
-    public void nextVector(ColumnVector previous,
-                           boolean[] isNull,
-                           int batchSize) throws IOException {
-      MapColumnVector result = (MapColumnVector) previous;
-      super.nextVector(result, isNull, batchSize);
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        lengths.nextVector(result, result.lengths, batchSize);
-        // even with repeating lengths, the map doesn't repeat
-        result.isRepeating = false;
-        // build the offsets vector and figure out how many children to read
-        result.childCount = 0;
-        for (int r = 0; r < batchSize; ++r) {
-          if (result.noNulls || !result.isNull[r]) {
-            result.offsets[r] = result.childCount;
-            result.childCount += result.lengths[r];
-          }
-        }
-        result.keys.ensureSize(result.childCount, false);
-        result.values.ensureSize(result.childCount, false);
-        keyReader.nextVector(result.keys, null, result.childCount);
-        valueReader.nextVector(result.values, null, result.childCount);
-      }
+    public Object nextVector(Object previous, final int batchSize) throws IOException {
+      throw new UnsupportedOperationException(
+          "NextVector is not supported operation for Map type");
     }
 
     @Override
@@ -2449,61 +2471,61 @@ public class TreeReaderFactory {
     }
   }
 
-  public static TreeReader createTreeReader(TypeDescription readerType,
-                                            SchemaEvolution evolution,
-                                            boolean[] included,
-                                            boolean skipCorrupt
-                                            ) throws IOException {
-    TypeDescription fileType = evolution.getFileType(readerType);
-    if (fileType == null ||
-        (included != null && !included[readerType.getId()])) {
-      return new NullTreeReader(0);
-    }
-    switch (readerType.getCategory()) {
+  public static TreeReader createTreeReader(int columnId,
+      TreeReaderSchema treeReaderSchema,
+      boolean[] included,
+      boolean skipCorrupt
+  ) throws IOException {
+    OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId);
+    switch (type.getKind()) {
       case BOOLEAN:
-        return new BooleanTreeReader(fileType.getId());
+        return new BooleanTreeReader(columnId);
       case BYTE:
-        return new ByteTreeReader(fileType.getId());
+        return new ByteTreeReader(columnId);
       case DOUBLE:
-        return new DoubleTreeReader(fileType.getId());
+        return new DoubleTreeReader(columnId);
       case FLOAT:
-        return new FloatTreeReader(fileType.getId());
+        return new FloatTreeReader(columnId);
       case SHORT:
-        return new ShortTreeReader(fileType.getId());
+        return new ShortTreeReader(columnId);
       case INT:
-        return new IntTreeReader(fileType.getId());
+        return new IntTreeReader(columnId);
       case LONG:
-        return new LongTreeReader(fileType.getId(), skipCorrupt);
+        return new LongTreeReader(columnId, skipCorrupt);
       case STRING:
-        return new StringTreeReader(fileType.getId());
+        return new StringTreeReader(columnId);
       case CHAR:
-        return new CharTreeReader(fileType.getId(), readerType.getMaxLength());
+        if (!type.hasMaximumLength()) {
+          throw new IllegalArgumentException("ORC char type has no length specified");
+        }
+        return new CharTreeReader(columnId, type.getMaximumLength());
       case VARCHAR:
-        return new VarcharTreeReader(fileType.getId(), readerType.getMaxLength());
+        if (!type.hasMaximumLength()) {
+          throw new IllegalArgumentException("ORC varchar type has no length specified");
+        }
+        return new VarcharTreeReader(columnId, type.getMaximumLength());
       case BINARY:
-        return new BinaryTreeReader(fileType.getId());
+        return new BinaryTreeReader(columnId);
       case TIMESTAMP:
-        return new TimestampTreeReader(fileType.getId(), skipCorrupt);
+        return new TimestampTreeReader(columnId, skipCorrupt);
       case DATE:
-        return new DateTreeReader(fileType.getId());
+        return new DateTreeReader(columnId);
       case DECIMAL:
-        return new DecimalTreeReader(fileType.getId(), readerType.getPrecision(),
-            readerType.getScale());
+        int precision =
+            type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
+        int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
+        return new DecimalTreeReader(columnId, precision, scale);
       case STRUCT:
-        return new StructTreeReader(fileType.getId(), readerType,
-            evolution, included, skipCorrupt);
+        return new StructTreeReader(columnId, treeReaderSchema, included, skipCorrupt);
       case LIST:
-        return new ListTreeReader(fileType.getId(), readerType,
-            evolution, included, skipCorrupt);
+        return new ListTreeReader(columnId, treeReaderSchema, included, skipCorrupt);
       case MAP:
-        return new MapTreeReader(fileType.getId(), readerType, evolution,
-            included, skipCorrupt);
+        return new MapTreeReader(columnId, treeReaderSchema, included, skipCorrupt);
       case UNION:
-        return new UnionTreeReader(fileType.getId(), readerType,
-            evolution, included, skipCorrupt);
+        return new UnionTreeReader(columnId, treeReaderSchema, included, skipCorrupt);
       default:
         throw new IllegalArgumentException("Unsupported type " +
-            readerType.getCategory());
+            type.getKind());
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index e4d2e6e..816b52d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -71,29 +71,14 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
         OrcInputFormat.raiseAcidTablesMustBeReadWithAcidReaderException(conf);
       }
 
-      rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
       /**
        * Do we have schema on read in the configuration variables?
        */
-      List<OrcProto.Type> types = file.getTypes();
-      int dataColumns = rbCtx.getDataColumnCount();
-      TypeDescription schema =
-          OrcInputFormat.getDesiredRowTypeDescr(conf, false, dataColumns);
-      if (schema == null) {
-        schema = file.getSchema();
-        // Even if the user isn't doing schema evolution, cut the schema
-        // to the desired size.
-        if (schema.getCategory() == TypeDescription.Category.STRUCT &&
-            schema.getChildren().size() > dataColumns) {
-          schema = schema.clone();
-          List<TypeDescription> children = schema.getChildren();
-          for(int c = children.size() - 1; c >= dataColumns; --c) {
-            children.remove(c);
-          }
-        }
-      }
-      Reader.Options options = new Reader.Options().schema(schema);
+      TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, /* isAcidRead */ false);
 
+      List<OrcProto.Type> types = file.getTypes();
+      Reader.Options options = new Reader.Options();
+      options.schema(schema);
       this.offset = fileSplit.getStart();
       this.length = fileSplit.getLength();
       options.range(offset, length);
@@ -102,6 +87,8 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
 
       this.reader = file.rowsOptions(options);
 
+      rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
+
       columnsToIncludeTruncated = rbCtx.getColumnsToIncludeTruncated(conf);
 
       int partitionColumnCount = rbCtx.getPartitionColumnCount();
@@ -116,6 +103,9 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
     @Override
     public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException {
 
+      if (!reader.hasNext()) {
+        return false;
+      }
       try {
         // Check and update partition cols if necessary. Ideally, this should be done
         // in CreateValue as the partition is constant per split. But since Hive uses
@@ -128,9 +118,7 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
           }
           addPartitionCols = false;
         }
-        if (!reader.nextBatch(value)) {
-          return false;
-        }
+        reader.nextBatch(value);
       } catch (Exception e) {
         throw new RuntimeException(e);
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 8e52907..70fe803 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -101,6 +101,8 @@ public class WriterImpl extends org.apache.orc.impl.WriterImpl implements Writer
     }
   }
 
+  private static final long NANOS_PER_MILLI = 1000000;
+
   /**
    * Set the value for a given column value within a batch.
    * @param rowId the row to set

http://git-wip-us.apache.org/repos/asf/hive/blob/d559b347/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
index 96af65a..2a82092 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
@@ -51,11 +51,11 @@ public class TestTypeDescription {
             .addField("f4", TypeDescription.createDouble())
             .addField("f5", TypeDescription.createBoolean()))
         .addField("f6", TypeDescription.createChar().withMaxLength(100));
-    assertEquals("struct<f1:uniontype<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
+    assertEquals("struct<f1:union<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
         struct.toString());
     assertEquals(
         "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
-            "  \"f1\": {\"category\": \"uniontype\", \"id\": 1, \"max\": 3, \"children\": [\n" +
+            "  \"f1\": {\"category\": \"union\", \"id\": 1, \"max\": 3, \"children\": [\n" +
             "    {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
             "    {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" +
             "  \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" +