You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2016/05/20 21:23:05 UTC
[27/27] hive git commit: HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module, by making shims for the row by row reader. (omalley reviewed by prasanth_j)

HIVE-11417. Move the ReaderImpl and RowReaderImpl to the ORC module,
by making shims for the row by row reader. (omalley reviewed by prasanth_j)

Fixes #72.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ffb79509
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ffb79509
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ffb79509

Branch: refs/heads/master
Commit: ffb79509bcaefb9e7f916930edb022371b9d810f
Parents: fd06601
Author: Owen O'Malley <om...@apache.org>
Authored: Fri May 20 14:20:00 2016 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri May 20 14:21:39 2016 -0700

----------------------------------------------------------------------
 bin/ext/orcfiledump.cmd                         |    2 +-
 bin/ext/orcfiledump.sh                          |    2 +-
 .../hive/hcatalog/streaming/TestStreaming.java  |    9 +-
 .../llap/io/decode/OrcEncodedDataConsumer.java  |    2 +-
 .../llap/io/encoded/OrcEncodedDataReader.java   |   15 +-
 .../hive/llap/io/metadata/OrcFileMetadata.java  |    4 +-
 orc/pom.xml                                     |   27 +
 .../org/apache/orc/FileFormatException.java     |   30 +
 orc/src/java/org/apache/orc/OrcFile.java        |    6 +
 orc/src/java/org/apache/orc/Reader.java         |    2 +-
 .../java/org/apache/orc/TypeDescription.java    |   18 +-
 orc/src/java/org/apache/orc/impl/AcidStats.java |   60 +
 .../orc/impl/ConvertTreeReaderFactory.java      | 2840 +++++++++++++
 .../java/org/apache/orc/impl/HadoopShims.java   |   79 +
 .../org/apache/orc/impl/HadoopShimsCurrent.java |   30 +
 .../org/apache/orc/impl/HadoopShims_2_2.java    |   71 +-
 .../java/org/apache/orc/impl/IntegerReader.java |    3 +-
 .../java/org/apache/orc/impl/OrcAcidUtils.java  |   85 +
 .../java/org/apache/orc/impl/ReaderImpl.java    |  758 ++++
 .../org/apache/orc/impl/RecordReaderImpl.java   | 1215 ++++++
 .../org/apache/orc/impl/RecordReaderUtils.java  |  578 +++
 .../org/apache/orc/impl/SchemaEvolution.java    |  190 +
 .../org/apache/orc/impl/TreeReaderFactory.java  | 2093 ++++++++++
 .../java/org/apache/orc/impl/ZeroCopyShims.java |   89 +
 orc/src/java/org/apache/orc/tools/FileDump.java |  934 +++++
 .../java/org/apache/orc/tools/JsonFileDump.java |  406 ++
 .../org/apache/orc/TestColumnStatistics.java    |  364 ++
 .../org/apache/orc/TestNewIntegerEncoding.java  | 1373 +++++++
 .../org/apache/orc/TestOrcNullOptimization.java |  415 ++
 .../test/org/apache/orc/TestOrcTimezone1.java   |  189 +
 .../test/org/apache/orc/TestOrcTimezone2.java   |  143 +
 .../org/apache/orc/TestStringDictionary.java    |  290 ++
 .../org/apache/orc/TestTypeDescription.java     |   68 +
 .../org/apache/orc/TestUnrolledBitPack.java     |  114 +
 .../test/org/apache/orc/TestVectorOrcFile.java  | 2782 +++++++++++++
 .../org/apache/orc/impl/TestOrcWideTable.java   |   64 +
 orc/src/test/org/apache/orc/impl/TestRLEv2.java |  307 ++
 .../org/apache/orc/impl/TestReaderImpl.java     |  152 +
 .../apache/orc/impl/TestRecordReaderImpl.java   | 1691 ++++++++
 .../org/apache/orc/impl/TestStreamName.java     |   49 +
 .../test/org/apache/orc/tools/TestFileDump.java |  486 +++
 .../org/apache/orc/tools/TestJsonFileDump.java  |  150 +
 orc/src/test/resources/orc-file-11-format.orc   |  Bin 0 -> 373336 bytes
 .../resources/orc-file-dump-bloomfilter.out     |  179 +
 .../resources/orc-file-dump-bloomfilter2.out    |  179 +
 .../orc-file-dump-dictionary-threshold.out      |  190 +
 orc/src/test/resources/orc-file-dump.json       | 1355 +++++++
 orc/src/test/resources/orc-file-dump.out        |  195 +
 orc/src/test/resources/orc-file-has-null.out    |  112 +
 .../expressions/CastDecimalToTimestamp.java     |    8 +-
 .../expressions/CastDoubleToTimestamp.java      |   13 +-
 .../vector/expressions/CastLongToTimestamp.java |    4 +-
 .../CastMillisecondsLongToTimestamp.java        |    7 +-
 .../ql/exec/vector/expressions/StringExpr.java  |  354 --
 .../hive/ql/hooks/PostExecOrcFileDump.java      |    4 +-
 .../hadoop/hive/ql/io/FileFormatException.java  |   30 -
 .../ql/io/orc/ConvertTreeReaderFactory.java     | 3750 ------------------
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  |  884 -----
 .../hadoop/hive/ql/io/orc/JsonFileDump.java     |  401 --
 .../hive/ql/io/orc/OrcRawRecordMerger.java      |   35 +-
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java |   71 +-
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       |  509 +--
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 1823 ++++-----
 .../hive/ql/io/orc/RecordReaderUtils.java       |  586 ---
 .../hadoop/hive/ql/io/orc/SchemaEvolution.java  |  190 -
 .../hive/ql/io/orc/TreeReaderFactory.java       | 2525 ------------
 .../ql/io/orc/encoded/EncodedReaderImpl.java    |    2 +-
 .../orc/encoded/EncodedTreeReaderFactory.java   |    2 +-
 .../apache/hadoop/hive/ql/TestTxnCommands.java  |    2 -
 .../TestTimestampWritableAndColumnVector.java   |    7 +-
 .../vector/expressions/TestVectorTypeCasts.java |   10 +-
 .../exec/vector/udf/TestVectorUDFAdaptor.java   |    2 -
 .../hive/ql/io/orc/TestColumnStatistics.java    |  352 --
 .../hadoop/hive/ql/io/orc/TestFileDump.java     |  418 --
 .../hadoop/hive/ql/io/orc/TestJsonFileDump.java |  139 -
 .../hive/ql/io/orc/TestNewIntegerEncoding.java  | 1342 -------
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |   70 +-
 .../hive/ql/io/orc/TestOrcNullOptimization.java |  400 --
 .../hive/ql/io/orc/TestOrcRecordUpdater.java    |    4 +-
 .../hadoop/hive/ql/io/orc/TestOrcTimezone1.java |  194 -
 .../hadoop/hive/ql/io/orc/TestOrcTimezone2.java |  142 -
 .../hadoop/hive/ql/io/orc/TestOrcWideTable.java |   64 -
 .../apache/hadoop/hive/ql/io/orc/TestRLEv2.java |  297 --
 .../hadoop/hive/ql/io/orc/TestReaderImpl.java   |  151 -
 .../hive/ql/io/orc/TestRecordReaderImpl.java    | 1678 --------
 .../hadoop/hive/ql/io/orc/TestStreamName.java   |   50 -
 .../hive/ql/io/orc/TestStringDictionary.java    |  261 --
 .../hive/ql/io/orc/TestTypeDescription.java     |   68 -
 .../hive/ql/io/orc/TestUnrolledBitPack.java     |  114 -
 .../hive/ql/io/orc/TestVectorOrcFile.java       | 2791 -------------
 .../resources/orc-file-dump-bloomfilter.out     |  179 -
 .../resources/orc-file-dump-bloomfilter2.out    |  179 -
 .../orc-file-dump-dictionary-threshold.out      |  190 -
 ql/src/test/resources/orc-file-dump.json        | 1355 -------
 ql/src/test/resources/orc-file-dump.out         |  195 -
 ql/src/test/resources/orc-file-has-null.out     |  112 -
 .../results/clientpositive/orc_create.q.out     |   12 +-
 .../clientpositive/orc_int_type_promotion.q.out |   12 +-
 ...vol_orc_vec_mapwork_part_all_primitive.q.out |   40 +-
 ...vol_orc_vec_mapwork_part_all_primitive.q.out |   40 +-
 .../clientpositive/vector_complex_all.q.out     |    6 +-
 .../hive/serde2/io/TimestampWritable.java       |  114 +-
 .../PrimitiveObjectInspectorUtils.java          |    7 +-
 .../hive/serde2/io/TestTimestampWritable.java   |   41 +-
 .../apache/hadoop/hive/shims/Hadoop23Shims.java |   63 +-
 .../apache/hadoop/hive/shims/ZeroCopyShims.java |   86 -
 .../apache/hadoop/hive/shims/HadoopShims.java   |   70 -
 .../hadoop/hive/shims/HadoopShimsSecure.java    |   29 -
 .../ql/exec/vector/TimestampColumnVector.java   |    9 +-
 .../ql/exec/vector/expressions/StringExpr.java  |  354 ++
 .../hive/ql/io/sarg/SearchArgumentImpl.java     |   16 +-
 .../hadoop/hive/ql/util/TimestampUtils.java     |   94 +
 112 files changed, 21796 insertions(+), 21556 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/bin/ext/orcfiledump.cmd
----------------------------------------------------------------------
diff --git a/bin/ext/orcfiledump.cmd b/bin/ext/orcfiledump.cmd
index f78ed7f..ff4b410 100644
--- a/bin/ext/orcfiledump.cmd
+++ b/bin/ext/orcfiledump.cmd
@@ -14,7 +14,7 @@
 @rem See the License for the specific language governing permissions and
 @rem limitations under the License.
 
-set CLASS=org.apache.hadoop.hive.ql.io.orc.FileDump
+set CLASS=org.apache.orc.tools.FileDump
 set HIVE_OPTS=
 set HADOOP_CLASSPATH=
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/bin/ext/orcfiledump.sh
----------------------------------------------------------------------
diff --git a/bin/ext/orcfiledump.sh b/bin/ext/orcfiledump.sh
index 74f1a1e..c84e61c 100644
--- a/bin/ext/orcfiledump.sh
+++ b/bin/ext/orcfiledump.sh
@@ -17,7 +17,7 @@ THISSERVICE=orcfiledump
 export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
 
 orcfiledump () {
-  CLASS=org.apache.hadoop.hive.ql.io.orc.FileDump
+  CLASS=org.apache.orc.tools.FileDump
   HIVE_OPTS=''
   execHiveCmd $CLASS "$@"
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
index 6016425..4d2a2ee 100644
--- a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
@@ -57,16 +57,15 @@ import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
 import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
 import org.apache.hadoop.hive.metastore.api.TxnInfo;
 import org.apache.hadoop.hive.metastore.api.TxnState;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
 import org.apache.hadoop.hive.ql.CommandNeedRetryException;
 import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.ql.io.orc.FileDump;
+import org.apache.orc.impl.OrcAcidUtils;
+import org.apache.orc.tools.FileDump;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater;
 import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
 import org.apache.hadoop.hive.ql.io.orc.Reader;
 import org.apache.hadoop.hive.ql.io.orc.RecordReader;
@@ -1089,7 +1088,7 @@ public class TestStreaming {
     Reader reader = OrcFile.createReader(orcFile,
             OrcFile.readerOptions(conf).filesystem(fs));
 
-    RecordReader rows = reader.rows(null);
+    RecordReader rows = reader.rows();
     StructObjectInspector inspector = (StructObjectInspector) reader
             .getObjectInspector();
 
@@ -1561,7 +1560,7 @@ public class TestStreaming {
       final Map<String, List<Long>> offsetMap, final String key, final int numEntries)
       throws IOException {
     Path dataPath = new Path(file);
-    Path sideFilePath = OrcRecordUpdater.getSideFile(dataPath);
+    Path sideFilePath = OrcAcidUtils.getSideFile(dataPath);
     Path cPath = new Path(sideFilePath.getParent(), sideFilePath.getName() + ".corrupt");
     FileSystem fs = sideFilePath.getFileSystem(conf);
     List<Long> offsets = offsetMap.get(key);

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index a689f10..619d1a4 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedTreeReaderFactory.Settabl
 import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch;
 import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl;
-import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory;
+import org.apache.orc.impl.TreeReaderFactory;
 import org.apache.hadoop.hive.ql.io.orc.WriterImpl;
 import org.apache.orc.OrcProto;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index 7effe69..69c0647 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -67,13 +67,12 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.io.orc.OrcSplit;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Reader;
 import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.SargApplier;
 import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedOrcFile;
 import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedReader;
 import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.PoolFactory;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils;
+import org.apache.orc.impl.RecordReaderUtils;
 import org.apache.orc.StripeInformation;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.mapred.FileSplit;
@@ -343,7 +342,8 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         // intermediate changes for individual columns will unset values in the array.
         // Skip this case for 0-column read. We could probably special-case it just like we do
         // in EncodedReaderImpl, but for now it's not that important.
-        if (colRgs.length > 0 && colRgs[0] == SargApplier.READ_NO_RGS) continue;
+        if (colRgs.length > 0 && colRgs[0] ==
+            RecordReaderImpl.SargApplier.READ_NO_RGS) continue;
 
         // 6.1. Determine the columns to read (usually the same as requested).
         if (cols == null || cols.size() == colRgs.length) {
@@ -691,12 +691,13 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
    */
   private boolean determineRgsToRead(boolean[] globalIncludes, int rowIndexStride,
       ArrayList<OrcStripeMetadata> metadata) throws IOException {
-    SargApplier sargApp = null;
+    RecordReaderImpl.SargApplier sargApp = null;
     if (sarg != null && rowIndexStride != 0) {
       List<OrcProto.Type> types = fileMetadata.getTypes();
       String[] colNamesForSarg = OrcInputFormat.getSargColumnNames(
           columnNames, types, globalIncludes, fileMetadata.isOriginalFormat());
-      sargApp = new SargApplier(sarg, colNamesForSarg, rowIndexStride, types, globalIncludes.length);
+      sargApp = new RecordReaderImpl.SargApplier(sarg, colNamesForSarg,
+          rowIndexStride, types, globalIncludes.length);
     }
     boolean hasAnyData = false;
     // readState should have been initialized by this time with an empty array.
@@ -710,8 +711,8 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
         rgsToRead = sargApp.pickRowGroups(stripe, stripeMetadata.getRowIndexes(),
             stripeMetadata.getBloomFilterIndexes(), true);
       }
-      boolean isNone = rgsToRead == SargApplier.READ_NO_RGS,
-          isAll = rgsToRead == SargApplier.READ_ALL_RGS;
+      boolean isNone = rgsToRead == RecordReaderImpl.SargApplier.READ_NO_RGS,
+          isAll = rgsToRead == RecordReaderImpl.SargApplier.READ_ALL_RGS;
       hasAnyData = hasAnyData || !isNone;
       if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) {
         if (isNone) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java
index 4e42a0f..c9b0a4d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java
@@ -29,11 +29,11 @@ import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer;
 import org.apache.hadoop.hive.ql.io.SyntheticFileId;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.io.orc.Reader;
-import org.apache.hadoop.hive.ql.io.orc.ReaderImpl.StripeInformationImpl;
 import org.apache.orc.CompressionKind;
 import org.apache.orc.FileMetadata;
 import org.apache.orc.OrcProto;
 import org.apache.orc.StripeInformation;
+import org.apache.orc.impl.ReaderImpl;
 
 /** ORC file metadata. Currently contains some duplicate info due to how different parts
  * of ORC use different info. Ideally we would get rid of protobuf structs in code beyond reading,
@@ -72,7 +72,7 @@ public final class OrcFileMetadata extends LlapCacheableBuffer implements FileMe
   @VisibleForTesting
   public static OrcFileMetadata createDummy(Object fileKey) {
     OrcFileMetadata ofm = new OrcFileMetadata(fileKey);
-    ofm.stripes.add(new StripeInformationImpl(
+    ofm.stripes.add(new ReaderImpl.StripeInformationImpl(
         OrcProto.StripeInformation.getDefaultInstance()));
     ofm.fileStats.add(OrcProto.ColumnStatistics.getDefaultInstance());
     ofm.stripeStats.add(OrcProto.StripeStatistics.newBuilder().addColStats(createStatsDummy()).build());

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/pom.xml
----------------------------------------------------------------------
diff --git a/orc/pom.xml b/orc/pom.xml
index 2d80c97..cc27077 100644
--- a/orc/pom.xml
+++ b/orc/pom.xml
@@ -72,6 +72,33 @@
       </exclusions>
     </dependency>
     <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <version>${hadoop.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>jsp-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
       <groupId>org.iq80.snappy</groupId>
       <artifactId>snappy</artifactId>
       <version>${snappy.version}</version>

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/FileFormatException.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/FileFormatException.java b/orc/src/java/org/apache/orc/FileFormatException.java
new file mode 100644
index 0000000..2cebea7
--- /dev/null
+++ b/orc/src/java/org/apache/orc/FileFormatException.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import java.io.IOException;
+
+/**
+ * Thrown when an invalid file format is encountered.
+ */
+public class FileFormatException extends IOException {
+
+  public FileFormatException(String errMsg) {
+    super(errMsg);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcFile.java b/orc/src/java/org/apache/orc/OrcFile.java
index 85506ff..7dd7333 100644
--- a/orc/src/java/org/apache/orc/OrcFile.java
+++ b/orc/src/java/org/apache/orc/OrcFile.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.impl.MemoryManager;
+import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.WriterImpl;
 
 /**
@@ -212,6 +213,11 @@ public class OrcFile {
     return new ReaderOptions(conf);
   }
 
+  public static Reader createReader(Path path,
+                                    ReaderOptions options) throws IOException {
+    return new ReaderImpl(path, options);
+  }
+
   public interface WriterContext {
     Writer getWriter();
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Reader.java b/orc/src/java/org/apache/orc/Reader.java
index 39de763..87f3293 100644
--- a/orc/src/java/org/apache/orc/Reader.java
+++ b/orc/src/java/org/apache/orc/Reader.java
@@ -334,7 +334,7 @@ public interface Reader {
    * @return a new RecordReader
    * @throws IOException
    */
-  RecordReader rowsOptions(Options options) throws IOException;
+  RecordReader rows(Options options) throws IOException;
 
   /**
    * @return List of integers representing version of the file, in order from major to minor.

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TypeDescription.java b/orc/src/java/org/apache/orc/TypeDescription.java
index b8e057e..ffe3c1f 100644
--- a/orc/src/java/org/apache/orc/TypeDescription.java
+++ b/orc/src/java/org/apache/orc/TypeDescription.java
@@ -344,25 +344,25 @@ public class TypeDescription {
       case INT:
       case LONG:
       case DATE:
-        return new LongColumnVector();
+        return new LongColumnVector(maxSize);
       case TIMESTAMP:
-        return new TimestampColumnVector();
+        return new TimestampColumnVector(maxSize);
       case FLOAT:
       case DOUBLE:
-        return new DoubleColumnVector();
+        return new DoubleColumnVector(maxSize);
       case DECIMAL:
-        return new DecimalColumnVector(precision, scale);
+        return new DecimalColumnVector(maxSize, precision, scale);
       case STRING:
       case BINARY:
       case CHAR:
       case VARCHAR:
-        return new BytesColumnVector();
+        return new BytesColumnVector(maxSize);
       case STRUCT: {
         ColumnVector[] fieldVector = new ColumnVector[children.size()];
         for(int i=0; i < fieldVector.length; ++i) {
           fieldVector[i] = children.get(i).createColumn(maxSize);
         }
-        return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+        return new StructColumnVector(maxSize,
                 fieldVector);
       }
       case UNION: {
@@ -370,14 +370,14 @@ public class TypeDescription {
         for(int i=0; i < fieldVector.length; ++i) {
           fieldVector[i] = children.get(i).createColumn(maxSize);
         }
-        return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+        return new UnionColumnVector(maxSize,
             fieldVector);
       }
       case LIST:
-        return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+        return new ListColumnVector(maxSize,
             children.get(0).createColumn(maxSize));
       case MAP:
-        return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+        return new MapColumnVector(maxSize,
             children.get(0).createColumn(maxSize),
             children.get(1).createColumn(maxSize));
       default:

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb79509/orc/src/java/org/apache/orc/impl/AcidStats.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/AcidStats.java b/orc/src/java/org/apache/orc/impl/AcidStats.java
new file mode 100644
index 0000000..6657fe9
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/AcidStats.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+/**
+ * Statistics about the ACID operations in an ORC file
+ */
+public class AcidStats {
+  public long inserts;
+  public long updates;
+  public long deletes;
+
+  public AcidStats() {
+    inserts = 0;
+    updates = 0;
+    deletes = 0;
+  }
+
+  public AcidStats(String serialized) {
+    String[] parts = serialized.split(",");
+    inserts = Long.parseLong(parts[0]);
+    updates = Long.parseLong(parts[1]);
+    deletes = Long.parseLong(parts[2]);
+  }
+
+  public String serialize() {
+    StringBuilder builder = new StringBuilder();
+    builder.append(inserts);
+    builder.append(",");
+    builder.append(updates);
+    builder.append(",");
+    builder.append(deletes);
+    return builder.toString();
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder builder = new StringBuilder();
+    builder.append(" inserts: ").append(inserts);
+    builder.append(" updates: ").append(updates);
+    builder.append(" deletes: ").append(deletes);
+    return builder.toString();
+  }
+}