You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@orc.apache.org by om...@apache.org on 2016/11/29 00:49:52 UTC

[1/2] orc git commit: HIVE-14089. Complex type support in LLAP is broken. (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Repository: orc
Updated Branches:
  refs/heads/master fb8aec28d -> c9a6dc1ac


HIVE-14089. Complex type support in LLAP is broken. (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/d09af8d9
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/d09af8d9
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/d09af8d9

Branch: refs/heads/master
Commit: d09af8d965a18921b86da991248a496317892d4a
Parents: fb8aec2
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Nov 28 15:15:45 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Nov 28 15:15:45 2016 -0800

----------------------------------------------------------------------
 .../orc/impl/ConvertTreeReaderFactory.java      |   2 +-
 .../org/apache/orc/impl/TreeReaderFactory.java  | 102 +++++++++++++++----
 java/pom.xml                                    |   2 +-
 java/storage-api/pom.xml                        |   2 +-
 .../common/io/encoded/EncodedColumnBatch.java   |  52 ++++++----
 5 files changed, 114 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index b414b90..ee9e68e 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -292,7 +292,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
      // Pass-thru.
       convertTreeReader.seek(index);
     }

http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 1e1ae01..73fe28f 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -126,7 +126,7 @@ public class TreeReaderFactory {
       }
     }
 
-    static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
+    protected static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
         InStream in,
         boolean signed,
         Context context) throws IOException {
@@ -162,7 +162,7 @@ public class TreeReaderFactory {
      * @param index the indexes loaded from the file
      * @throws IOException
      */
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -248,6 +248,10 @@ public class TreeReaderFactory {
     public BitFieldReader getPresent() {
       return present;
     }
+
+    public int getColumnId() {
+      return columnId;
+    }
   }
 
   public static class NullTreeReader extends TreeReader {
@@ -309,7 +313,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -360,7 +364,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -427,7 +431,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -494,7 +498,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -562,7 +566,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -616,7 +620,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -710,7 +714,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -821,7 +825,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -950,7 +954,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -1059,7 +1063,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -1130,7 +1134,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -1244,7 +1248,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       reader.seek(index);
     }
 
@@ -1392,7 +1396,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -1542,7 +1546,7 @@ public class TreeReaderFactory {
     }
 
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       seek(index[columnId]);
     }
 
@@ -1748,7 +1752,7 @@ public class TreeReaderFactory {
     }
   }
 
-  protected static class StructTreeReader extends TreeReader {
+  public static class StructTreeReader extends TreeReader {
     protected final TreeReader[] fields;
 
     protected StructTreeReader(int columnId,
@@ -1764,8 +1768,23 @@ public class TreeReaderFactory {
       }
     }
 
+    public TreeReader[] getChildReaders() {
+      return fields;
+    }
+
+    protected StructTreeReader(int columnId, InStream present,
+                               Context context,
+                               OrcProto.ColumnEncoding encoding,
+                               TreeReader[] childReaders) throws IOException {
+      super(columnId, present, context);
+      if (encoding != null) {
+        checkEncoding(encoding);
+      }
+      this.fields = childReaders;
+    }
+
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       super.seek(index);
       for (TreeReader kid : fields) {
         if (kid != null) {
@@ -1847,8 +1866,19 @@ public class TreeReaderFactory {
       }
     }
 
+    protected UnionTreeReader(int columnId, InStream present,
+                              Context context,
+                              OrcProto.ColumnEncoding encoding,
+                              TreeReader[] childReaders) throws IOException {
+      super(columnId, present, context);
+      if (encoding != null) {
+        checkEncoding(encoding);
+      }
+      this.fields = childReaders;
+    }
+
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       super.seek(index);
       tags.seek(index[columnId]);
       for (TreeReader kid : fields) {
@@ -1917,8 +1947,22 @@ public class TreeReaderFactory {
       elementReader = createTreeReader(elementType, context);
     }
 
+    protected ListTreeReader(int columnId,
+                             InStream present,
+                             Context context,
+                             InStream data,
+                             OrcProto.ColumnEncoding encoding,
+                             TreeReader elementReader) throws IOException {
+      super(columnId, present, context);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.lengths = createIntegerReader(encoding.getKind(), data, false, context);
+      }
+      this.elementReader = elementReader;
+    }
+
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       super.seek(index);
       lengths.seek(index[columnId]);
       elementReader.seek(index);
@@ -1996,8 +2040,24 @@ public class TreeReaderFactory {
       valueReader = createTreeReader(valueType, context);
     }
 
+    protected MapTreeReader(int columnId,
+                            InStream present,
+                            Context context,
+                            InStream data,
+                            OrcProto.ColumnEncoding encoding,
+                            TreeReader keyReader,
+                            TreeReader valueReader) throws IOException {
+      super(columnId, present, context);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.lengths = createIntegerReader(encoding.getKind(), data, false, context);
+      }
+      this.keyReader = keyReader;
+      this.valueReader = valueReader;
+    }
+
     @Override
-    void seek(PositionProvider[] index) throws IOException {
+    public void seek(PositionProvider[] index) throws IOException {
       super.seek(index);
       lengths.seek(index[columnId]);
       keyReader.seek(index);

http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index 338c2ba..09b45ae 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -70,7 +70,7 @@
     <test.tmp.dir>${project.build.directory}/testing-tmp</test.tmp.dir>
 
     <hadoop.version>2.6.4</hadoop.version>
-    <storage-api.version>2.1.1-pre-orc</storage-api.version>
+    <storage-api.version>2.1.1.2-pre-orc</storage-api.version>
   </properties>
 
   <build>

http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/storage-api/pom.xml
----------------------------------------------------------------------
diff --git a/java/storage-api/pom.xml b/java/storage-api/pom.xml
index 85f8060..a8543b4 100644
--- a/java/storage-api/pom.xml
+++ b/java/storage-api/pom.xml
@@ -27,7 +27,7 @@
   <artifactId>hive-storage-api</artifactId>
   <!-- remove our custom version of storage-api once we get the changes
        released as hive 2.1.1 -->
-  <version>2.1.1-pre-orc</version>
+  <version>2.1.1.2-pre-orc</version>
   <packaging>jar</packaging>
   <name>Hive Storage API</name>
 

http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java b/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
index 907181e..13772c9 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.common.io.encoded;
 
+import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -76,14 +77,17 @@ public class EncodedColumnBatch<BatchKey> {
   /** The key that is used to map this batch to source location. */
   protected BatchKey batchKey;
   /**
-   * Stream data for each stream, for each included column.
-   * For each column, streams are indexed by kind, with missing elements being null.
+   * Stream data for each column that has true in the corresponding hasData position.
+   * For each column, streams are indexed by kind (for ORC), with missing elements being null.
    */
   protected ColumnStreamData[][] columnData;
-  /** Column indexes included in the batch. Correspond to columnData elements. */
-  protected int[] columnIxs;
+  /** Indicates which columns have data. Correspond to columnData elements. */
+  protected boolean[] hasData;
 
   public void reset() {
+    if (hasData != null) {
+      Arrays.fill(hasData, false);
+    }
     if (columnData == null) return;
     for (int i = 0; i < columnData.length; ++i) {
       if (columnData[i] == null) continue;
@@ -93,37 +97,37 @@ public class EncodedColumnBatch<BatchKey> {
     }
   }
 
-  public void initColumn(int colIxMod, int colIx, int streamCount) {
-    columnIxs[colIxMod] = colIx;
-    if (columnData[colIxMod] == null || columnData[colIxMod].length != streamCount) {
-      columnData[colIxMod] = new ColumnStreamData[streamCount];
+  public void initColumn(int colIx, int streamCount) {
+    hasData[colIx] = true;
+    if (columnData[colIx] == null || columnData[colIx].length != streamCount) {
+      columnData[colIx] = new ColumnStreamData[streamCount];
     }
   }
 
-  public void setStreamData(int colIxMod, int streamKind, ColumnStreamData csd) {
-    columnData[colIxMod][streamKind] = csd;
-  }
-
-  public void setAllStreamsData(int colIxMod, int colIx, ColumnStreamData[] sbs) {
-    columnIxs[colIxMod] = colIx;
-    columnData[colIxMod] = sbs;
+  public void setStreamData(int colIx, int streamIx, ColumnStreamData csd) {
+    assert hasData[colIx];
+    columnData[colIx][streamIx] = csd;
   }
 
   public BatchKey getBatchKey() {
     return batchKey;
   }
 
-  public ColumnStreamData[][] getColumnData() {
-    return columnData;
+  public ColumnStreamData[] getColumnData(int colIx) {
+    if (!hasData[colIx]) throw new AssertionError("No data for column " + colIx);
+    return columnData[colIx];
   }
 
-  public int[] getColumnIxs() {
-    return columnIxs;
+  public int getTotalColCount() {
+    return columnData.length; // Includes the columns that have no data
   }
 
   protected void resetColumnArrays(int columnCount) {
-    if (columnIxs != null && columnCount == columnIxs.length) return;
-    columnIxs = new int[columnCount];
+    if (hasData != null && columnCount == hasData.length) {
+      Arrays.fill(hasData, false);
+      return;
+    }
+    hasData = new boolean[columnCount];
     ColumnStreamData[][] columnData = new ColumnStreamData[columnCount][];
     if (this.columnData != null) {
       for (int i = 0; i < Math.min(columnData.length, this.columnData.length); ++i) {
@@ -132,4 +136,8 @@ public class EncodedColumnBatch<BatchKey> {
     }
     this.columnData = columnData;
   }
-}
\ No newline at end of file
+
+  public boolean hasData(int colIx) {
+    return hasData[colIx];
+  }
+}

[2/2] orc git commit: HIVE-15124. Fix OrcInputFormat to use reader's schema for include boolean array.

Posted by om...@apache.org.

HIVE-15124. Fix OrcInputFormat to use reader's schema for include
boolean array.

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/c9a6dc1a
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/c9a6dc1a
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/c9a6dc1a

Branch: refs/heads/master
Commit: c9a6dc1ac4b15a54184203b2151151564f98874b
Parents: d09af8d
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Nov 28 16:48:34 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Nov 28 16:48:34 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/orc/impl/ReaderImpl.java    | 12 ------
 .../org/apache/orc/impl/RecordReaderImpl.java   | 25 ++++++------
 .../org/apache/orc/impl/SchemaEvolution.java    | 40 +++++++++++++++++++-
 .../org/apache/orc/impl/TreeReaderFactory.java  |  3 +-
 4 files changed, 50 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 039365c..ad3f8ba 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -585,18 +585,6 @@ public class ReaderImpl implements Reader {
   @Override
   public RecordReader rows(Options options) throws IOException {
     LOG.info("Reading ORC rows from " + path + " with " + options);
-    boolean[] include = options.getInclude();
-    // if included columns is null, then include all columns
-    if (include == null) {
-      options = options.clone();
-      TypeDescription readSchema = options.getSchema();
-      if (readSchema == null) {
-        readSchema = schema;
-      }
-      include = new boolean[readSchema.getMaximumId() + 1];
-      Arrays.fill(include, true);
-      options.include(include);
-    }
     return new RecordReaderImpl(this, options);
   }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index c7ce2bb..362dd3d 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -77,8 +77,8 @@ public class RecordReaderImpl implements RecordReader {
   private final List<OrcProto.Type> types;
   private final int bufferSize;
   private final SchemaEvolution evolution;
-  private final boolean[] included;
-  private final boolean[] writerIncluded;
+  // the file included columns indexed by the file's column ids.
+  private final boolean[] fileIncluded;
   private final long rowIndexStride;
   private long rowInStripe = 0;
   private int currentStripe = -1;
@@ -138,9 +138,7 @@ public class RecordReaderImpl implements RecordReader {
 
   protected RecordReaderImpl(ReaderImpl fileReader,
                              Reader.Options options) throws IOException {
-    this.included = options.getInclude();
     this.writerVersion = fileReader.getWriterVersion();
-    included[0] = true;
     if (options.getSchema() == null) {
       if (LOG.isInfoEnabled()) {
         LOG.info("Reader schema not provided -- using file schema " +
@@ -174,7 +172,7 @@ public class RecordReaderImpl implements RecordReader {
     if (sarg != null && rowIndexStride != 0) {
       sargApp = new SargApplier(sarg, options.getColumnNames(),
                                 rowIndexStride,
-                                included.length, evolution,
+                                evolution,
                                 writerVersion);
     } else {
       sargApp = null;
@@ -198,7 +196,7 @@ public class RecordReaderImpl implements RecordReader {
       zeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(fileReader.conf);
     }
     if (options.getDataReader() != null) {
-      this.dataReader = options.getDataReader();
+      this.dataReader = options.getDataReader().clone();
     } else {
       this.dataReader = RecordReaderUtils.createDefaultDataReader(
           DataReaderProperties.builder()
@@ -224,7 +222,7 @@ public class RecordReaderImpl implements RecordReader {
       .skipCorrupt(skipCorrupt);
     reader = TreeReaderFactory.createTreeReader(evolution.getReaderSchema(), readerContext);
 
-    writerIncluded = evolution.getFileIncluded();
+    this.fileIncluded = evolution.getFileIncluded();
     indexes = new OrcProto.RowIndex[types.size()];
     bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
     bloomFilterKind = new OrcProto.Stream.Kind[types.size()];
@@ -733,7 +731,6 @@ public class RecordReaderImpl implements RecordReader {
     public SargApplier(SearchArgument sarg,
                        String[] columnNames,
                        long rowIndexStride,
-                       int includedCount,
                        SchemaEvolution evolution,
                        OrcFile.WriterVersion writerVersion) {
       this.writerVersion = writerVersion;
@@ -744,7 +741,7 @@ public class RecordReaderImpl implements RecordReader {
       this.rowIndexStride = rowIndexStride;
       // included will not be null, row options will fill the array with
       // trues if null
-      sargColumns = new boolean[includedCount];
+      sargColumns = new boolean[evolution.getFileIncluded().length];
       for (int i : filterColumns) {
         // filter columns may have -1 as index which could be partition
         // column in SARG.
@@ -832,7 +829,7 @@ public class RecordReaderImpl implements RecordReader {
     if (sargApp == null) {
       return null;
     }
-    readRowIndex(currentStripe, writerIncluded, sargApp.sargColumns);
+    readRowIndex(currentStripe, fileIncluded, sargApp.sargColumns);
     return sargApp.pickRowGroups(stripes.get(currentStripe), indexes,
         bloomFilterKind, bloomFilterIndices, false);
   }
@@ -890,7 +887,7 @@ public class RecordReaderImpl implements RecordReader {
   }
 
   private boolean isFullRead() {
-    for (boolean isColumnPresent : writerIncluded){
+    for (boolean isColumnPresent : fileIncluded){
       if (!isColumnPresent){
         return false;
       }
@@ -1005,7 +1002,7 @@ public class RecordReaderImpl implements RecordReader {
   private void readPartialDataStreams(StripeInformation stripe) throws IOException {
     List<OrcProto.Stream> streamList = stripeFooter.getStreamsList();
     DiskRangeList toRead = planReadPartialDataStreams(streamList,
-        indexes, writerIncluded, includedRowGroups, codec != null,
+        indexes, fileIncluded, includedRowGroups, codec != null,
         stripeFooter.getColumnsList(), types, bufferSize, true);
     if (LOG.isDebugEnabled()) {
       LOG.debug("chunks = " + RecordReaderUtils.stringifyDiskRanges(toRead));
@@ -1015,7 +1012,7 @@ public class RecordReaderImpl implements RecordReader {
       LOG.debug("merge = " + RecordReaderUtils.stringifyDiskRanges(bufferChunks));
     }
 
-    createStreams(streamList, bufferChunks, writerIncluded, codec, bufferSize, streams);
+    createStreams(streamList, bufferChunks, fileIncluded, codec, bufferSize, streams);
   }
 
   /**
@@ -1227,7 +1224,7 @@ public class RecordReaderImpl implements RecordReader {
       currentStripe = rightStripe;
       readStripe();
     }
-    readRowIndex(currentStripe, writerIncluded, sargApp == null ? null : sargApp.sargColumns);
+    readRowIndex(currentStripe, fileIncluded, sargApp == null ? null : sargApp.sargColumns);
 
     // if we aren't to the right row yet, advance in the stripe.
     advanceToNextRow(reader, rowNumber, true);

http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 20adfd8..b76a319 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -39,6 +39,8 @@ public class SchemaEvolution {
   private final TypeDescription[] readerFileTypes;
   // indexed by reader column id
   private final boolean[] readerIncluded;
+  // the offset to the first column id ignoring any ACID columns
+  private final int readerColumnOffset;
   // indexed by file column id
   private final boolean[] fileIncluded;
   private final TypeDescription fileSchema;
@@ -76,12 +78,20 @@ public class SchemaEvolution {
     this.hasConversion = false;
     this.fileSchema = fileSchema;
     isAcid = checkAcidSchema(fileSchema);
+    this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
     if (readerSchema != null) {
       if (isAcid) {
         this.readerSchema = createEventSchema(readerSchema);
       } else {
         this.readerSchema = readerSchema;
       }
+      if (readerIncluded != null &&
+          readerIncluded.length + readerColumnOffset !=
+            this.readerSchema.getMaximumId() + 1) {
+        throw new IllegalArgumentException("Include vector the wrong length: "
+            + this.readerSchema.toJson() + " with include length "
+            + readerIncluded.length);
+      }
       this.readerFileTypes =
         new TypeDescription[this.readerSchema.getMaximumId() + 1];
       int positionalLevels = 0;
@@ -106,6 +116,13 @@ public class SchemaEvolution {
       this.readerSchema = fileSchema;
       this.readerFileTypes =
         new TypeDescription[this.readerSchema.getMaximumId() + 1];
+      if (readerIncluded != null &&
+          readerIncluded.length + readerColumnOffset !=
+            this.readerSchema.getMaximumId() + 1) {
+        throw new IllegalArgumentException("Include vector the wrong length: "
+            + this.readerSchema.toJson() + " with include length "
+            + readerIncluded.length);
+      }
       buildIdentityConversion(this.readerSchema);
     }
     this.ppdSafeConversion = populatePpdSafeConversion();
@@ -170,10 +187,18 @@ public class SchemaEvolution {
     return readerFileTypes[id];
   }
 
+  /**
+   * Get whether each column is included from the reader's point of view.
+   * @return a boolean array indexed by reader column id
+   */
   public boolean[] getReaderIncluded() {
     return readerIncluded;
   }
 
+  /**
+   * Get whether each column is included from the file's point of view.
+   * @return a boolean array indexed by file column id
+   */
   public boolean[] getFileIncluded() {
     return fileIncluded;
   }
@@ -273,6 +298,17 @@ public class SchemaEvolution {
   }
 
   /**
+   * Should we read the given reader column?
+   * @param readerId the id of column in the extended reader schema
+   * @return true if the column should be read
+   */
+  public boolean includeReaderColumn(int readerId) {
+    return readerIncluded == null ||
+        readerId <= readerColumnOffset ||
+        readerIncluded[readerId - readerColumnOffset];
+  }
+
+  /**
    * Build the mapping from the file type to the reader type. For pre-HIVE-4243
    * ORC files, the top level structure is matched using position within the
    * row. Otherwise, structs fields are matched by name.
@@ -287,7 +323,7 @@ public class SchemaEvolution {
                        TypeDescription readerType,
                        int positionalLevels) {
     // if the column isn't included, don't map it
-    if (readerIncluded != null && !readerIncluded[readerType.getId()]) {
+    if (!includeReaderColumn(readerType.getId())) {
       return;
     }
     boolean isOk = true;
@@ -400,7 +436,7 @@ public class SchemaEvolution {
 
   void buildIdentityConversion(TypeDescription readerType) {
     int id = readerType.getId();
-    if (readerIncluded != null && !readerIncluded[id]) {
+    if (!includeReaderColumn(id)) {
       return;
     }
     if (readerFileTypes[id] != null) {

http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 73fe28f..be8d6b2 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -2132,8 +2132,7 @@ public class TreeReaderFactory {
     final SchemaEvolution evolution = context.getSchemaEvolution();
     final boolean[] included = evolution.getReaderIncluded();
     TypeDescription fileType = evolution.getFileType(readerType);
-    if (fileType == null ||
-        (included != null && !included[readerType.getId()])) {
+    if (fileType == null || !evolution.includeReaderColumn(readerType.getId())){
       return new NullTreeReader(0);
     }
     TypeDescription.Category readerTypeCategory = readerType.getCategory();