You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/07/19 16:58:24 UTC

[01/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Repository: hive
Updated Branches:
  refs/heads/branch-2.2 3e426721c -> df8921d85


http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestRunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestRunLengthIntegerReader.java b/orc/src/test/org/apache/orc/impl/TestRunLengthIntegerReader.java
deleted file mode 100644
index 28239ba..0000000
--- a/orc/src/test/org/apache/orc/impl/TestRunLengthIntegerReader.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-import java.util.Random;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-
-public class TestRunLengthIntegerReader {
-
-  public void runSeekTest(CompressionCodec codec) throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
-        new OutStream("test", 1000, codec, collect), true);
-    TestInStream.PositionCollector[] positions =
-        new TestInStream.PositionCollector[4096];
-    Random random = new Random(99);
-    int[] junk = new int[2048];
-    for(int i=0; i < junk.length; ++i) {
-      junk[i] = random.nextInt();
-    }
-    for(int i=0; i < 4096; ++i) {
-      positions[i] = new TestInStream.PositionCollector();
-      out.getPosition(positions[i]);
-      // test runs, incrementing runs, non-runs
-      if (i < 1024) {
-        out.write(i/4);
-      } else if (i < 2048) {
-        out.write(2*i);
-      } else {
-        out.write(junk[i-2048]);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
-        ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
-            codec, 1000), true);
-    for(int i=0; i < 2048; ++i) {
-      int x = (int) in.next();
-      if (i < 1024) {
-        assertEquals(i/4, x);
-      } else if (i < 2048) {
-        assertEquals(2*i, x);
-      } else {
-        assertEquals(junk[i-2048], x);
-      }
-    }
-    for(int i=2047; i >= 0; --i) {
-      in.seek(positions[i]);
-      int x = (int) in.next();
-      if (i < 1024) {
-        assertEquals(i/4, x);
-      } else if (i < 2048) {
-        assertEquals(2*i, x);
-      } else {
-        assertEquals(junk[i-2048], x);
-      }
-    }
-  }
-
-  @Test
-  public void testUncompressedSeek() throws Exception {
-    runSeekTest(null);
-  }
-
-  @Test
-  public void testCompressedSeek() throws Exception {
-    runSeekTest(new ZlibCodec());
-  }
-
-  @Test
-  public void testSkips() throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
-        new OutStream("test", 100, null, collect), true);
-    for(int i=0; i < 2048; ++i) {
-      if (i < 1024) {
-        out.write(i);
-      } else {
-        out.write(256 * i);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
-        ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
-            null, 100), true);
-    for(int i=0; i < 2048; i += 10) {
-      int x = (int) in.next();
-      if (i < 1024) {
-        assertEquals(i, x);
-      } else {
-        assertEquals(256 * i, x);
-      }
-      if (i < 2038) {
-        in.skip(9);
-      }
-      in.skip(0);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
deleted file mode 100644
index e0ed2ad..0000000
--- a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ /dev/null
@@ -1,480 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestSchemaEvolution {
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  Configuration conf;
-  Path testFilePath;
-  FileSystem fs;
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-
-  @Before
-  public void setup() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testDataTypeConversion1() throws IOException {
-    TypeDescription fileStruct1 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
-    SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
-    assertFalse(same1.hasConversion());
-    TypeDescription readerStruct1 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
-    SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
-    assertFalse(both1.hasConversion());
-    TypeDescription readerStruct1diff = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createLong())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
-    SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
-    assertTrue(both1diff.hasConversion());
-    TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
-    SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, null);
-    assertTrue(both1diffPrecision.hasConversion());
-  }
-
-  @Test
-  public void testDataTypeConversion2() throws IOException {
-    TypeDescription fileStruct2 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createByte())
-            .addUnionChild(TypeDescription.createDecimal()
-                .withPrecision(20).withScale(10)))
-        .addField("f2", TypeDescription.createStruct()
-            .addField("f3", TypeDescription.createDate())
-            .addField("f4", TypeDescription.createDouble())
-            .addField("f5", TypeDescription.createBoolean()))
-        .addField("f6", TypeDescription.createChar().withMaxLength(100));
-    SchemaEvolution same2 = new SchemaEvolution(fileStruct2, null);
-    assertFalse(same2.hasConversion());
-    TypeDescription readerStruct2 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createByte())
-            .addUnionChild(TypeDescription.createDecimal()
-                .withPrecision(20).withScale(10)))
-        .addField("f2", TypeDescription.createStruct()
-            .addField("f3", TypeDescription.createDate())
-            .addField("f4", TypeDescription.createDouble())
-            .addField("f5", TypeDescription.createBoolean()))
-        .addField("f6", TypeDescription.createChar().withMaxLength(100));
-    SchemaEvolution both2 = new SchemaEvolution(fileStruct2, readerStruct2, null);
-    assertFalse(both2.hasConversion());
-    TypeDescription readerStruct2diff = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createByte())
-            .addUnionChild(TypeDescription.createDecimal()
-                .withPrecision(20).withScale(10)))
-        .addField("f2", TypeDescription.createStruct()
-            .addField("f3", TypeDescription.createDate())
-            .addField("f4", TypeDescription.createDouble())
-            .addField("f5", TypeDescription.createByte()))
-        .addField("f6", TypeDescription.createChar().withMaxLength(100));
-    SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null);
-    assertTrue(both2diff.hasConversion());
-    TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createByte())
-            .addUnionChild(TypeDescription.createDecimal()
-                .withPrecision(20).withScale(10)))
-        .addField("f2", TypeDescription.createStruct()
-            .addField("f3", TypeDescription.createDate())
-            .addField("f4", TypeDescription.createDouble())
-            .addField("f5", TypeDescription.createBoolean()))
-        .addField("f6", TypeDescription.createChar().withMaxLength(80));
-    SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null);
-    assertTrue(both2diffChar.hasConversion());
-  }
-
-  @Test
-  public void testFloatToDoubleEvolution() throws Exception {
-    testFilePath = new Path(workDir, "TestOrcFile." +
-        testCaseName.getMethodName() + ".orc");
-    TypeDescription schema = TypeDescription.createFloat();
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .bufferSize(10000));
-    VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
-    DoubleColumnVector dcv = new DoubleColumnVector(1024);
-    batch.cols[0] = dcv;
-    batch.reset();
-    batch.size = 1;
-    dcv.vector[0] = 74.72f;
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    TypeDescription schemaOnRead = TypeDescription.createDouble();
-    RecordReader rows = reader.rows(new Reader.Options().schema(schemaOnRead));
-    batch = schemaOnRead.createRowBatch();
-    rows.nextBatch(batch);
-    assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001);
-    rows.close();
-  }
-
-  @Test
-  public void testSafePpdEvaluation() throws IOException {
-    TypeDescription fileStruct1 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
-    SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
-    assertTrue(same1.isPPDSafeConversion(0));
-    assertFalse(same1.hasConversion());
-    TypeDescription readerStruct1 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
-    SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
-    assertFalse(both1.hasConversion());
-    assertTrue(both1.isPPDSafeConversion(0));
-    assertTrue(both1.isPPDSafeConversion(1));
-    assertTrue(both1.isPPDSafeConversion(2));
-    assertTrue(both1.isPPDSafeConversion(3));
-
-    // int -> long
-    TypeDescription readerStruct1diff = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createLong())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
-    SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
-    assertTrue(both1diff.hasConversion());
-    assertFalse(both1diff.isPPDSafeConversion(0));
-    assertTrue(both1diff.isPPDSafeConversion(1));
-    assertTrue(both1diff.isPPDSafeConversion(2));
-    assertTrue(both1diff.isPPDSafeConversion(3));
-
-    // decimal(38,10) -> decimal(12, 10)
-    TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
-    SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision,
-        new boolean[] {true, false, false, true});
-    assertTrue(both1diffPrecision.hasConversion());
-    assertFalse(both1diffPrecision.isPPDSafeConversion(0));
-    assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included
-    assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included
-    assertFalse(both1diffPrecision.isPPDSafeConversion(3));
-
-    // add columns
-    readerStruct1 = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10))
-        .addField("f4", TypeDescription.createBoolean());
-    both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
-    assertTrue(both1.hasConversion());
-    assertFalse(both1.isPPDSafeConversion(0));
-    assertTrue(both1.isPPDSafeConversion(1));
-    assertTrue(both1.isPPDSafeConversion(2));
-    assertTrue(both1.isPPDSafeConversion(3));
-    assertFalse(both1.isPPDSafeConversion(4));
-  }
-
-  @Test
-  public void testSafePpdEvaluationForInts() throws IOException {
-    // byte -> short -> int -> long
-    TypeDescription fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createByte());
-    SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertFalse(schemaEvolution.hasConversion());
-
-    // byte -> short
-    TypeDescription readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createShort());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // byte -> int
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // byte -> long
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createLong());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // short -> int -> long
-    fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createShort());
-    schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertFalse(schemaEvolution.hasConversion());
-
-    // unsafe conversion short -> byte
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createByte());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // short -> int
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // short -> long
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createLong());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // int -> long
-    fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt());
-    schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertFalse(schemaEvolution.hasConversion());
-
-    // unsafe conversion int -> byte
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createByte());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // unsafe conversion int -> short
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createShort());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // int -> long
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createLong());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // long
-    fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createLong());
-    schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertTrue(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.hasConversion());
-
-    // unsafe conversion long -> byte
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createByte());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // unsafe conversion long -> short
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createShort());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // unsafe conversion long -> int
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // invalid
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createString());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // invalid
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createFloat());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // invalid
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createTimestamp());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-  }
-
-  @Test
-  public void testSafePpdEvaluationForStrings() throws IOException {
-    TypeDescription fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createString());
-    SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertTrue(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.hasConversion());
-
-    // string -> char
-    TypeDescription readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createChar());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // string -> varchar
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createVarchar());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createChar());
-    schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertTrue(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.hasConversion());
-
-    // char -> string
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createString());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // char -> varchar
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createVarchar());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    fileSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createVarchar());
-    schemaEvolution = new SchemaEvolution(fileSchema, null);
-    assertTrue(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.hasConversion());
-
-    // varchar -> string
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createString());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertTrue(schemaEvolution.isPPDSafeConversion(1));
-
-    // varchar -> char
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createChar());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // invalid
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createDecimal());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // invalid
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createDate());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-
-    // invalid
-    readerSchema = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt());
-    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
-    assertTrue(schemaEvolution.hasConversion());
-    assertFalse(schemaEvolution.isPPDSafeConversion(0));
-    assertFalse(schemaEvolution.isPPDSafeConversion(1));
-  }
-
-  @Test
-  public void ensureFileIncluded() throws IOException {
-    TypeDescription file = TypeDescription.fromString("struct<x:int,y:int>");
-    SchemaEvolution evolution = new SchemaEvolution(file, null);
-    boolean[] include = evolution.getFileIncluded();
-    assertEquals(3, include.length);
-    for(int i=0; i < include.length; ++i) {
-      assertTrue("element " + i, include[i]);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java b/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java
deleted file mode 100644
index 4a8a0f2..0000000
--- a/orc/src/test/org/apache/orc/impl/TestSerializationUtils.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.InputStream;
-import java.math.BigInteger;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-import org.junit.Test;
-
-import com.google.common.math.LongMath;
-
-public class TestSerializationUtils {
-
-  private InputStream fromBuffer(ByteArrayOutputStream buffer) {
-    return new ByteArrayInputStream(buffer.toByteArray());
-  }
-
-  @Test
-  public void testDoubles() throws Exception {
-    double tolerance = 0.0000000000000001;
-    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-    SerializationUtils utils = new SerializationUtils();
-    utils.writeDouble(buffer, 1343822337.759);
-    assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), tolerance);
-    buffer = new ByteArrayOutputStream();
-    utils.writeDouble(buffer, 0.8);
-    double got = utils.readDouble(fromBuffer(buffer));
-    assertEquals(0.8, got, tolerance);
-  }
-
-  @Test
-  public void testBigIntegers() throws Exception {
-    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0));
-    assertArrayEquals(new byte[]{0}, buffer.toByteArray());
-    assertEquals(0L,
-        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1));
-    assertArrayEquals(new byte[]{2}, buffer.toByteArray());
-    assertEquals(1L,
-        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1));
-    assertArrayEquals(new byte[]{1}, buffer.toByteArray());
-    assertEquals(-1L,
-        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50));
-    assertArrayEquals(new byte[]{100}, buffer.toByteArray());
-    assertEquals(50L,
-        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50));
-    assertArrayEquals(new byte[]{99}, buffer.toByteArray());
-    assertEquals(-50L,
-        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
-    for(int i=-8192; i < 8192; ++i) {
-      buffer.reset();
-        SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i));
-      assertEquals("compare length for " + i,
-            i >= -64 && i < 64 ? 1 : 2, buffer.size());
-      assertEquals("compare result for " + i,
-          i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue());
-    }
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer,
-        new BigInteger("123456789abcdef0",16));
-    assertEquals(new BigInteger("123456789abcdef0",16),
-        SerializationUtils.readBigInteger(fromBuffer(buffer)));
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer,
-        new BigInteger("-123456789abcdef0",16));
-    assertEquals(new BigInteger("-123456789abcdef0",16),
-        SerializationUtils.readBigInteger(fromBuffer(buffer)));
-    StringBuilder buf = new StringBuilder();
-    for(int i=0; i < 256; ++i) {
-      String num = Integer.toHexString(i);
-      if (num.length() == 1) {
-        buf.append('0');
-      }
-      buf.append(num);
-    }
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer,
-        new BigInteger(buf.toString(),16));
-    assertEquals(new BigInteger(buf.toString(),16),
-        SerializationUtils.readBigInteger(fromBuffer(buffer)));
-    buffer.reset();
-    SerializationUtils.writeBigInteger(buffer,
-        new BigInteger("ff000000000000000000000000000000000000000000ff",16));
-    assertEquals(
-        new BigInteger("ff000000000000000000000000000000000000000000ff",16),
-        SerializationUtils.readBigInteger(fromBuffer(buffer)));
-  }
-
-  @Test
-  public void testSubtractionOverflow() {
-    // cross check results with Guava results below
-    SerializationUtils utils = new SerializationUtils();
-    assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE));
-    assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE));
-    assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE));
-    assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L));
-    assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE));
-    assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0));
-  }
-
-  @Test
-  public void testSubtractionOverflowGuava() {
-    try {
-      LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE);
-      fail("expected ArithmeticException for overflow");
-    } catch (ArithmeticException ex) {
-      assertEquals(ex.getMessage(), "overflow");
-    }
-
-    try {
-      LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE);
-      fail("expected ArithmeticException for overflow");
-    } catch (ArithmeticException ex) {
-      assertEquals(ex.getMessage(), "overflow");
-    }
-
-    try {
-      LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE);
-      fail("expected ArithmeticException for overflow");
-    } catch (ArithmeticException ex) {
-      assertEquals(ex.getMessage(), "overflow");
-    }
-
-    assertEquals(-8106206116692740190L,
-        LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L));
-    assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE));
-    assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0));
-  }
-
-  @Test
-  public void testRandomFloats() throws Exception {
-    float tolerance = 0.0000000000000001f;
-    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-    SerializationUtils utils = new SerializationUtils();
-    Random rand = new Random();
-    int n = 100_000;
-    float[] expected = new float[n];
-    for (int i = 0; i < n; i++) {
-      float f = rand.nextFloat();
-      expected[i] = f;
-      utils.writeFloat(buffer, f);
-    }
-    InputStream newBuffer = fromBuffer(buffer);
-    for (int i = 0; i < n; i++) {
-      float got = utils.readFloat(newBuffer);
-      assertEquals(expected[i], got, tolerance);
-    }
-  }
-
-  @Test
-  public void testRandomDoubles() throws Exception {
-    double tolerance = 0.0000000000000001;
-    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-    SerializationUtils utils = new SerializationUtils();
-    Random rand = new Random();
-    int n = 100_000;
-    double[] expected = new double[n];
-    for (int i = 0; i < n; i++) {
-      double d = rand.nextDouble();
-      expected[i] = d;
-      utils.writeDouble(buffer, d);
-    }
-    InputStream newBuffer = fromBuffer(buffer);
-    for (int i = 0; i < n; i++) {
-      double got = utils.readDouble(newBuffer);
-      assertEquals(expected[i], got, tolerance);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestStreamName.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestStreamName.java b/orc/src/test/org/apache/orc/impl/TestStreamName.java
deleted file mode 100644
index be58d4c..0000000
--- a/orc/src/test/org/apache/orc/impl/TestStreamName.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.OrcProto;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-public class TestStreamName {
-
-  @Test
-  public void test1() throws Exception {
-    StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
-    StreamName s2 = new StreamName(3,
-        OrcProto.Stream.Kind.DICTIONARY_DATA);
-    StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
-    StreamName s4 = new StreamName(5,
-        OrcProto.Stream.Kind.DICTIONARY_DATA);
-    StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
-    assertEquals(true, s1.equals(s1));
-    assertEquals(false, s1.equals(s2));
-    assertEquals(false, s1.equals(s3));
-    assertEquals(true, s1.equals(s1p));
-    assertEquals(true, s1.compareTo(null) < 0);
-    assertEquals(false, s1.equals(null));
-    assertEquals(true, s1.compareTo(s2) < 0);
-    assertEquals(true, s2.compareTo(s3) < 0);
-    assertEquals(true, s3.compareTo(s4) < 0);
-    assertEquals(true, s4.compareTo(s1p) > 0);
-    assertEquals(0, s1p.compareTo(s1));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java b/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java
deleted file mode 100644
index 3d4612c..0000000
--- a/orc/src/test/org/apache/orc/impl/TestStringRedBlackTree.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.hadoop.io.DataOutputBuffer;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.orc.impl.RedBlackTree;
-import org.apache.orc.impl.StringRedBlackTree;
-import org.junit.Test;
-
-import java.io.IOException;
-
-import static junit.framework.Assert.assertEquals;
-
-/**
- * Test the red-black tree with string keys.
- */
-public class TestStringRedBlackTree {
-
-  /**
-   * Checks the red-black tree rules to make sure that we have correctly built
-   * a valid tree.
-   *
-   * Properties:
-   *   1. Red nodes must have black children
-   *   2. Each node must have the same black height on both sides.
-   *
-   * @param node The id of the root of the subtree to check for the red-black
-   *        tree properties.
-   * @return The black-height of the subtree.
-   */
-  private int checkSubtree(RedBlackTree tree, int node, IntWritable count
-                          ) throws IOException {
-    if (node == RedBlackTree.NULL) {
-      return 1;
-    }
-    count.set(count.get() + 1);
-    boolean is_red = tree.isRed(node);
-    int left = tree.getLeft(node);
-    int right = tree.getRight(node);
-    if (is_red) {
-      if (tree.isRed(left)) {
-        printTree(tree, "", tree.root);
-        throw new IllegalStateException("Left node of " + node + " is " + left +
-          " and both are red.");
-      }
-      if (tree.isRed(right)) {
-        printTree(tree, "", tree.root);
-        throw new IllegalStateException("Right node of " + node + " is " +
-          right + " and both are red.");
-      }
-    }
-    int left_depth = checkSubtree(tree, left, count);
-    int right_depth = checkSubtree(tree, right, count);
-    if (left_depth != right_depth) {
-      printTree(tree, "", tree.root);
-      throw new IllegalStateException("Lopsided tree at node " + node +
-        " with depths " + left_depth + " and " + right_depth);
-    }
-    if (is_red) {
-      return left_depth;
-    } else {
-      return left_depth + 1;
-    }
-  }
-
-  /**
-   * Checks the validity of the entire tree. Also ensures that the number of
-   * nodes visited is the same as the size of the set.
-   */
-  void checkTree(RedBlackTree tree) throws IOException {
-    IntWritable count = new IntWritable(0);
-    if (tree.isRed(tree.root)) {
-      printTree(tree, "", tree.root);
-      throw new IllegalStateException("root is red");
-    }
-    checkSubtree(tree, tree.root, count);
-    if (count.get() != tree.size) {
-      printTree(tree, "", tree.root);
-      throw new IllegalStateException("Broken tree! visited= " + count.get() +
-        " size=" + tree.size);
-    }
-  }
-
-  void printTree(RedBlackTree tree, String indent, int node
-                ) throws IOException {
-    if (node == RedBlackTree.NULL) {
-      System.err.println(indent + "NULL");
-    } else {
-      System.err.println(indent + "Node " + node + " color " +
-        (tree.isRed(node) ? "red" : "black"));
-      printTree(tree, indent + "  ", tree.getLeft(node));
-      printTree(tree, indent + "  ", tree.getRight(node));
-    }
-  }
-
-  private static class MyVisitor implements StringRedBlackTree.Visitor {
-    private final String[] words;
-    private final int[] order;
-    private final DataOutputBuffer buffer = new DataOutputBuffer();
-    int current = 0;
-
-    MyVisitor(String[] args, int[] order) {
-      words = args;
-      this.order = order;
-    }
-
-    @Override
-    public void visit(StringRedBlackTree.VisitorContext context
-                     ) throws IOException {
-      String word = context.getText().toString();
-      assertEquals("in word " + current, words[current], word);
-      assertEquals("in word " + current, order[current],
-        context.getOriginalPosition());
-      buffer.reset();
-      context.writeBytes(buffer);
-      assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
-      current += 1;
-    }
-  }
-
-  void checkContents(StringRedBlackTree tree, int[] order,
-                     String... params
-                    ) throws IOException {
-    tree.visit(new MyVisitor(params, order));
-  }
-
-  StringRedBlackTree buildTree(String... params) throws IOException {
-    StringRedBlackTree result = new StringRedBlackTree(1000);
-    for(String word: params) {
-      result.add(word);
-      checkTree(result);
-    }
-    return result;
-  }
-
-  @Test
-  public void test1() throws Exception {
-    StringRedBlackTree tree = new StringRedBlackTree(5);
-    assertEquals(0, tree.getSizeInBytes());
-    checkTree(tree);
-    assertEquals(0, tree.add("owen"));
-    checkTree(tree);
-    assertEquals(1, tree.add("ashutosh"));
-    checkTree(tree);
-    assertEquals(0, tree.add("owen"));
-    checkTree(tree);
-    assertEquals(2, tree.add("alan"));
-    checkTree(tree);
-    assertEquals(2, tree.add("alan"));
-    checkTree(tree);
-    assertEquals(1, tree.add("ashutosh"));
-    checkTree(tree);
-    assertEquals(3, tree.add("greg"));
-    checkTree(tree);
-    assertEquals(4, tree.add("eric"));
-    checkTree(tree);
-    assertEquals(5, tree.add("arun"));
-    checkTree(tree);
-    assertEquals(6, tree.size());
-    checkTree(tree);
-    assertEquals(6, tree.add("eric14"));
-    checkTree(tree);
-    assertEquals(7, tree.add("o"));
-    checkTree(tree);
-    assertEquals(8, tree.add("ziggy"));
-    checkTree(tree);
-    assertEquals(9, tree.add("z"));
-    checkTree(tree);
-    checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8},
-      "alan", "arun", "ashutosh", "eric", "eric14", "greg",
-      "o", "owen", "z", "ziggy");
-    assertEquals(32888, tree.getSizeInBytes());
-    // check that adding greg again bumps the count
-    assertEquals(3, tree.add("greg"));
-    assertEquals(41, tree.getCharacterSize());
-    // add some more strings to test the different branches of the
-    // rebalancing
-    assertEquals(10, tree.add("zak"));
-    checkTree(tree);
-    assertEquals(11, tree.add("eric1"));
-    checkTree(tree);
-    assertEquals(12, tree.add("ash"));
-    checkTree(tree);
-    assertEquals(13, tree.add("harry"));
-    checkTree(tree);
-    assertEquals(14, tree.add("john"));
-    checkTree(tree);
-    tree.clear();
-    checkTree(tree);
-    assertEquals(0, tree.getSizeInBytes());
-    assertEquals(0, tree.getCharacterSize());
-  }
-
-  @Test
-  public void test2() throws Exception {
-    StringRedBlackTree tree =
-      buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
-        "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
-    assertEquals(26, tree.size());
-    checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
-      15,16,17, 18,19,20, 21,22,23, 24,25},
-      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
-      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
-  }
-
-  @Test
-  public void test3() throws Exception {
-    StringRedBlackTree tree =
-      buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
-        "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
-    assertEquals(26, tree.size());
-    checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
-      13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
-      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
-      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestZlib.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestZlib.java b/orc/src/test/org/apache/orc/impl/TestZlib.java
deleted file mode 100644
index 327ecfc..0000000
--- a/orc/src/test/org/apache/orc/impl/TestZlib.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.fail;
-
-public class TestZlib {
-
-  @Test
-  public void testNoOverflow() throws Exception {
-    ByteBuffer in = ByteBuffer.allocate(10);
-    ByteBuffer out = ByteBuffer.allocate(10);
-    in.put(new byte[]{1,2,3,4,5,6,7,10});
-    in.flip();
-    CompressionCodec codec = new ZlibCodec();
-    assertEquals(false, codec.compress(in, out, null));
-  }
-
-  @Test
-  public void testCorrupt() throws Exception {
-    ByteBuffer buf = ByteBuffer.allocate(1000);
-    buf.put(new byte[]{127,-128,0,99,98,-1});
-    buf.flip();
-    CompressionCodec codec = new ZlibCodec();
-    ByteBuffer out = ByteBuffer.allocate(1000);
-    try {
-      codec.decompress(buf, out);
-      fail();
-    } catch (IOException ioe) {
-      // EXPECTED
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/tools/TestFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/tools/TestFileDump.java b/orc/src/test/org/apache/orc/tools/TestFileDump.java
deleted file mode 100644
index ce3381e..0000000
--- a/orc/src/test/org/apache/orc/tools/TestFileDump.java
+++ /dev/null
@@ -1,486 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.tools;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.PrintStream;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcConf;
-import org.apache.orc.OrcFile;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestFileDump {
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Before
-  public void openFileSystem () throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestFileDump.testDump.orc");
-    fs.delete(testFilePath, false);
-  }
-
-  static TypeDescription getMyRecordType() {
-    return TypeDescription.createStruct()
-        .addField("i", TypeDescription.createInt())
-        .addField("l", TypeDescription.createLong())
-        .addField("s", TypeDescription.createString());
-  }
-
-  static void appendMyRecord(VectorizedRowBatch batch,
-                             int i,
-                             long l,
-                             String str) {
-    ((LongColumnVector) batch.cols[0]).vector[batch.size] = i;
-    ((LongColumnVector) batch.cols[1]).vector[batch.size] = l;
-    if (str == null) {
-      batch.cols[2].noNulls = false;
-      batch.cols[2].isNull[batch.size] = true;
-    } else {
-      ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
-          str.getBytes());
-    }
-    batch.size += 1;
-  }
-
-  static TypeDescription getAllTypesType() {
-    return TypeDescription.createStruct()
-        .addField("b", TypeDescription.createBoolean())
-        .addField("bt", TypeDescription.createByte())
-        .addField("s", TypeDescription.createShort())
-        .addField("i", TypeDescription.createInt())
-        .addField("l", TypeDescription.createLong())
-        .addField("f", TypeDescription.createFloat())
-        .addField("d", TypeDescription.createDouble())
-        .addField("de", TypeDescription.createDecimal())
-        .addField("t", TypeDescription.createTimestamp())
-        .addField("dt", TypeDescription.createDate())
-        .addField("str", TypeDescription.createString())
-        .addField("c", TypeDescription.createChar().withMaxLength(5))
-        .addField("vc", TypeDescription.createVarchar().withMaxLength(10))
-        .addField("m", TypeDescription.createMap(
-            TypeDescription.createString(),
-            TypeDescription.createString()))
-        .addField("a", TypeDescription.createList(TypeDescription.createInt()))
-        .addField("st", TypeDescription.createStruct()
-                .addField("i", TypeDescription.createInt())
-                .addField("s", TypeDescription.createString()));
-  }
-
-  static void appendAllTypes(VectorizedRowBatch batch,
-                             boolean b,
-                             byte bt,
-                             short s,
-                             int i,
-                             long l,
-                             float f,
-                             double d,
-                             HiveDecimalWritable de,
-                             Timestamp t,
-                             DateWritable dt,
-                             String str,
-                             String c,
-                             String vc,
-                             Map<String, String> m,
-                             List<Integer> a,
-                             int sti,
-                             String sts) {
-    int row = batch.size++;
-    ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0;
-    ((LongColumnVector) batch.cols[1]).vector[row] = bt;
-    ((LongColumnVector) batch.cols[2]).vector[row] = s;
-    ((LongColumnVector) batch.cols[3]).vector[row] = i;
-    ((LongColumnVector) batch.cols[4]).vector[row] = l;
-    ((DoubleColumnVector) batch.cols[5]).vector[row] = f;
-    ((DoubleColumnVector) batch.cols[6]).vector[row] = d;
-    ((DecimalColumnVector) batch.cols[7]).vector[row].set(de);
-    ((TimestampColumnVector) batch.cols[8]).set(row, t);
-    ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays();
-    ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes());
-    ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes());
-    ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes());
-    MapColumnVector map = (MapColumnVector) batch.cols[13];
-    int offset = map.childCount;
-    map.offsets[row] = offset;
-    map.lengths[row] = m.size();
-    map.childCount += map.lengths[row];
-    for(Map.Entry<String, String> entry: m.entrySet()) {
-      ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
-      ((BytesColumnVector) map.values).setVal(offset++,
-          entry.getValue().getBytes());
-    }
-    ListColumnVector list = (ListColumnVector) batch.cols[14];
-    offset = list.childCount;
-    list.offsets[row] = offset;
-    list.lengths[row] = a.size();
-    list.childCount += list.lengths[row];
-    for(int e=0; e < a.size(); ++e) {
-      ((LongColumnVector) list.child).vector[offset + e] = a.get(e);
-    }
-    StructColumnVector struct = (StructColumnVector) batch.cols[15];
-    ((LongColumnVector) struct.fields[0]).vector[row] = sti;
-    ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes());
-  }
-
-  public static void checkOutput(String expected,
-                                 String actual) throws Exception {
-    BufferedReader eStream =
-        new BufferedReader(new FileReader
-            (TestJsonFileDump.getFileFromClasspath(expected)));
-    BufferedReader aStream =
-        new BufferedReader(new FileReader(actual));
-    String expectedLine = eStream.readLine().trim();
-    while (expectedLine != null) {
-      String actualLine = aStream.readLine().trim();
-      System.out.println("actual:   " + actualLine);
-      System.out.println("expected: " + expectedLine);
-      Assert.assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-      expectedLine = expectedLine == null ? null : expectedLine.trim();
-    }
-    Assert.assertNull(eStream.readLine());
-    Assert.assertNull(aStream.readLine());
-    eStream.close();
-    aStream.close();
-  }
-
-  @Test
-  public void testDump() throws Exception {
-    TypeDescription schema = getMyRecordType();
-    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .fileSystem(fs)
-            .setSchema(schema)
-            .compress(CompressionKind.ZLIB)
-            .stripeSize(100000)
-            .rowIndexStride(1000));
-    Random r1 = new Random(1);
-    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
-        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
-        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
-        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
-        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
-        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
-        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
-        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
-        "before", "us,", "we", "were", "all", "going", "direct", "to",
-        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
-        "way"};
-    VectorizedRowBatch batch = schema.createRowBatch(1000);
-    for(int i=0; i < 21000; ++i) {
-      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
-          words[r1.nextInt(words.length)]);
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-    }
-    if (batch.size > 0) {
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    PrintStream origOut = System.out;
-    String outputFilename = "orc-file-dump.out";
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
-    System.out.flush();
-    System.setOut(origOut);
-
-
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
-  }
-
-  @Test
-  public void testDataDump() throws Exception {
-    TypeDescription schema = getAllTypesType();
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .fileSystem(fs)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .rowIndexStride(1000));
-    VectorizedRowBatch batch = schema.createRowBatch(1000);
-    Map<String, String> m = new HashMap<String, String>(2);
-    m.put("k1", "v1");
-    appendAllTypes(batch,
-        true,
-        (byte) 10,
-        (short) 100,
-        1000,
-        10000L,
-        4.0f,
-        20.0,
-        new HiveDecimalWritable("4.2222"),
-        new Timestamp(1416967764000L),
-        new DateWritable(new Date(1416967764000L)),
-        "string",
-        "hello",
-       "hello",
-        m,
-        Arrays.asList(100, 200),
-        10, "foo");
-    m.clear();
-    m.put("k3", "v3");
-    appendAllTypes(
-        batch,
-        false,
-        (byte)20,
-        (short)200,
-        2000,
-        20000L,
-        8.0f,
-        40.0,
-        new HiveDecimalWritable("2.2222"),
-        new Timestamp(1416967364000L),
-        new DateWritable(new Date(1411967764000L)),
-        "abcd",
-        "world",
-        "world",
-        m,
-        Arrays.asList(200, 300),
-        20, "bar");
-    writer.addRowBatch(batch);
-
-    writer.close();
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "-d"});
-    System.out.flush();
-    System.setOut(origOut);
-    String[] lines = myOut.toString().split("\n");
-    Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
-    Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
-  }
-  
-  // Test that if the fraction of rows that have distinct strings is greater than the configured
-  // threshold dictionary encoding is turned off.  If dictionary encoding is turned off the length
-  // of the dictionary stream for the column will be 0 in the ORC file dump.
-  @Test
-  public void testDictionaryThreshold() throws Exception {
-    TypeDescription schema = getMyRecordType();
-    Configuration conf = new Configuration();
-    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
-    conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f);
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .fileSystem(fs)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.ZLIB)
-            .rowIndexStride(1000)
-            .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch(1000);
-    Random r1 = new Random(1);
-    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
-        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
-        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
-        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
-        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
-        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
-        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
-        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
-        "before", "us,", "we", "were", "all", "going", "direct", "to",
-        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
-        "way"};
-    int nextInt = 0;
-    for(int i=0; i < 21000; ++i) {
-      // Write out the same string twice, this guarantees the fraction of rows with
-      // distinct strings is 0.5
-      if (i % 2 == 0) {
-        nextInt = r1.nextInt(words.length);
-        // Append the value of i to the word, this guarantees when an index or word is repeated
-        // the actual string is unique.
-        words[nextInt] += "-" + i;
-      }
-      appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]);
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-    }
-    if (batch.size != 0) {
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    PrintStream origOut = System.out;
-    String outputFilename = "orc-file-dump-dictionary-threshold.out";
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
-    System.out.flush();
-    System.setOut(origOut);
-
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
-  }
-
-  @Test
-  public void testBloomFilter() throws Exception {
-    TypeDescription schema = getMyRecordType();
-    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
-    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
-        .fileSystem(fs)
-        .setSchema(schema)
-        .stripeSize(100000)
-        .compress(CompressionKind.ZLIB)
-        .bufferSize(10000)
-        .rowIndexStride(1000)
-        .bloomFilterColumns("S");
-    Writer writer = OrcFile.createWriter(testFilePath, options);
-    Random r1 = new Random(1);
-    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
-        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
-        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
-        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
-        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
-        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
-        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
-        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
-        "before", "us,", "we", "were", "all", "going", "direct", "to",
-        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
-        "way"};
-    VectorizedRowBatch batch = schema.createRowBatch(1000);
-    for(int i=0; i < 21000; ++i) {
-      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
-          words[r1.nextInt(words.length)]);
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-    }
-    if (batch.size > 0) {
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    PrintStream origOut = System.out;
-    String outputFilename = "orc-file-dump-bloomfilter.out";
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
-    System.out.flush();
-    System.setOut(origOut);
-
-
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
-  }
-
-  @Test
-  public void testBloomFilter2() throws Exception {
-    TypeDescription schema = getMyRecordType();
-    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
-    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
-        .fileSystem(fs)
-        .setSchema(schema)
-        .stripeSize(100000)
-        .compress(CompressionKind.ZLIB)
-        .bufferSize(10000)
-        .rowIndexStride(1000)
-        .bloomFilterColumns("l")
-        .bloomFilterFpp(0.01);
-    VectorizedRowBatch batch = schema.createRowBatch(1000);
-    Writer writer = OrcFile.createWriter(testFilePath, options);
-    Random r1 = new Random(1);
-    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
-        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
-        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
-        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
-        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
-        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
-        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
-        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
-        "before", "us,", "we", "were", "all", "going", "direct", "to",
-        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
-        "way"};
-    for(int i=0; i < 21000; ++i) {
-      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
-          words[r1.nextInt(words.length)]);
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-    }
-    if (batch.size > 0) {
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    PrintStream origOut = System.out;
-    String outputFilename = "orc-file-dump-bloomfilter2.out";
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
-    System.out.flush();
-    System.setOut(origOut);
-
-
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java b/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java
deleted file mode 100644
index a514824..0000000
--- a/orc/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.tools;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.PrintStream;
-import java.net.URL;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcConf;
-import org.apache.orc.OrcFile;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestJsonFileDump {
-  public static String getFileFromClasspath(String name) {
-    URL url = ClassLoader.getSystemResource(name);
-    if (url == null) {
-      throw new IllegalArgumentException("Could not find " + name);
-    }
-    return url.getPath();
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Before
-  public void openFileSystem () throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestFileDump.testDump.orc");
-    fs.delete(testFilePath, false);
-  }
-
-  static void checkOutput(String expected,
-                                  String actual) throws Exception {
-    BufferedReader eStream =
-        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
-    BufferedReader aStream =
-        new BufferedReader(new FileReader(actual));
-    String expectedLine = eStream.readLine();
-    while (expectedLine != null) {
-      String actualLine = aStream.readLine();
-      System.out.println("actual:   " + actualLine);
-      System.out.println("expected: " + expectedLine);
-      assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-    }
-    assertNull(eStream.readLine());
-    assertNull(aStream.readLine());
-  }
-
-  @Test
-  public void testJsonDump() throws Exception {
-    TypeDescription schema = TypeDescription.createStruct()
-        .addField("i", TypeDescription.createInt())
-        .addField("l", TypeDescription.createLong())
-        .addField("s", TypeDescription.createString());
-    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
-    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
-        .fileSystem(fs)
-        .setSchema(schema)
-        .stripeSize(100000)
-        .compress(CompressionKind.ZLIB)
-        .bufferSize(10000)
-        .rowIndexStride(1000)
-        .bloomFilterColumns("s");
-    Writer writer = OrcFile.createWriter(testFilePath, options);
-    Random r1 = new Random(1);
-    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
-        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
-        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
-        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
-        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
-        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
-        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
-        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
-        "before", "us,", "we", "were", "all", "going", "direct", "to",
-        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
-        "way"};
-    VectorizedRowBatch batch = schema.createRowBatch(1000);
-    for(int i=0; i < 21000; ++i) {
-      ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt();
-      ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong();
-      if (i % 100 == 0) {
-        batch.cols[2].noNulls = false;
-        batch.cols[2].isNull[batch.size] = true;
-      } else {
-        ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
-            words[r1.nextInt(words.length)].getBytes());
-      }
-      batch.size += 1;
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-    }
-    if (batch.size > 0) {
-      writer.addRowBatch(batch);
-    }
-
-    writer.close();
-    PrintStream origOut = System.out;
-    String outputFilename = "orc-file-dump.json";
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"});
-    System.out.flush();
-    System.setOut(origOut);
-
-
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
-  }
-}


[26/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/TreeReaderFactory.java b/orc/src/java/org/apache/hive/orc/impl/TreeReaderFactory.java
new file mode 100644
index 0000000..d53ef34
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/TreeReaderFactory.java
@@ -0,0 +1,2162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.OrcProto;
+
+/**
+ * Factory for creating ORC tree readers.
+ */
+public class TreeReaderFactory {
+
+  public abstract static class TreeReader {
+    protected final int columnId;
+    protected BitFieldReader present = null;
+    protected boolean valuePresent = false;
+    protected int vectorColumnCount;
+
+    TreeReader(int columnId) throws IOException {
+      this(columnId, null);
+    }
+
+    protected TreeReader(int columnId, InStream in) throws IOException {
+      this.columnId = columnId;
+      if (in == null) {
+        present = null;
+        valuePresent = true;
+      } else {
+        present = new BitFieldReader(in, 1);
+      }
+      vectorColumnCount = -1;
+    }
+
+    void setVectorColumnCount(int vectorColumnCount) {
+      this.vectorColumnCount = vectorColumnCount;
+    }
+
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    protected static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
+        InStream in,
+        boolean signed, boolean skipCorrupt) throws IOException {
+      switch (kind) {
+        case DIRECT_V2:
+        case DICTIONARY_V2:
+          return new RunLengthIntegerReaderV2(in, signed, skipCorrupt);
+        case DIRECT:
+        case DICTIONARY:
+          return new RunLengthIntegerReader(in, signed);
+        default:
+          throw new IllegalArgumentException("Unknown encoding " + kind);
+      }
+    }
+
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      checkEncoding(stripeFooter.getColumnsList().get(columnId));
+      InStream in = streams.get(new StreamName(columnId,
+          OrcProto.Stream.Kind.PRESENT));
+      if (in == null) {
+        present = null;
+        valuePresent = true;
+      } else {
+        present = new BitFieldReader(in, 1);
+      }
+    }
+
+    /**
+     * Seek to the given position.
+     *
+     * @param index the indexes loaded from the file
+     * @throws IOException
+     */
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    public void seek(PositionProvider index) throws IOException {
+      if (present != null) {
+        present.seek(index);
+      }
+    }
+
+    protected long countNonNulls(long rows) throws IOException {
+      if (present != null) {
+        long result = 0;
+        for (long c = 0; c < rows; ++c) {
+          if (present.next() == 1) {
+            result += 1;
+          }
+        }
+        return result;
+      } else {
+        return rows;
+      }
+    }
+
+    abstract void skipRows(long rows) throws IOException;
+
+    /**
+     * Called at the top level to read into the given batch.
+     * @param batch the batch to read into
+     * @param batchSize the number of rows to read
+     * @throws IOException
+     */
+    public void nextBatch(VectorizedRowBatch batch,
+                          int batchSize) throws IOException {
+      batch.cols[0].reset();
+      batch.cols[0].ensureSize(batchSize, false);
+      nextVector(batch.cols[0], null, batchSize);
+    }
+
+    /**
+     * Populates the isNull vector array in the previousVector object based on
+     * the present stream values. This function is called from all the child
+     * readers, and they all set the values based on isNull field value.
+     *
+     * @param previous The columnVector object whose isNull value is populated
+     * @param isNull Whether the each value was null at a higher level. If
+     *               isNull is null, all values are non-null.
+     * @param batchSize      Size of the column vector
+     * @throws IOException
+     */
+    // TODO: it looks like isNull is never used; it's always null. Remove/deprecate?
+    public void nextVector(ColumnVector previous,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (present != null || isNull != null) {
+        // Set noNulls and isNull vector of the ColumnVector based on
+        // present stream
+        previous.noNulls = true;
+        boolean allNull = true;
+        for (int i = 0; i < batchSize; i++) {
+          if (isNull == null || !isNull[i]) {
+            if (present != null && present.next() != 1) {
+              previous.noNulls = false;
+              previous.isNull[i] = true;
+            } else {
+              previous.isNull[i] = false;
+              allNull = false;
+            }
+          } else {
+            previous.noNulls = false;
+            previous.isNull[i] = true;
+          }
+        }
+        previous.isRepeating = !previous.noNulls && allNull;
+      } else {
+        // There is no present stream, this means that all the values are
+        // present.
+        previous.noNulls = true;
+        for (int i = 0; i < batchSize; i++) {
+          previous.isNull[i] = false;
+        }
+      }
+    }
+
+    public BitFieldReader getPresent() {
+      return present;
+    }
+
+    public int getColumnId() {
+      return columnId;
+    }
+  }
+
+  public static class NullTreeReader extends TreeReader {
+
+    public NullTreeReader(int columnId) throws IOException {
+      super(columnId);
+    }
+
+    @Override
+    public void startStripe(Map<StreamName, InStream> streams,
+                            OrcProto.StripeFooter footer) {
+      // PASS
+    }
+
+    @Override
+    void skipRows(long rows) {
+      // PASS
+    }
+
+    @Override
+    public void seek(PositionProvider position) {
+      // PASS
+    }
+
+    @Override
+    public void seek(PositionProvider[] position) {
+      // PASS
+    }
+
+    @Override
+    public void nextVector(ColumnVector vector, boolean[] isNull, int size) {
+      vector.noNulls = false;
+      vector.isNull[0] = true;
+      vector.isRepeating = true;
+    }
+  }
+
+  public static class BooleanTreeReader extends TreeReader {
+    protected BitFieldReader reader = null;
+
+    BooleanTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected BooleanTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      if (data != null) {
+        reader = new BitFieldReader(data, 1);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      reader = new BitFieldReader(streams.get(new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA)), 1);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      LongColumnVector result = (LongColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, batchSize);
+    }
+  }
+
+  public static class ByteTreeReader extends TreeReader {
+    protected RunLengthByteReader reader = null;
+
+    ByteTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected ByteTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      this.reader = new RunLengthByteReader(data);
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      reader = new RunLengthByteReader(streams.get(new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA)));
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final LongColumnVector result = (LongColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, result.vector, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class ShortTreeReader extends TreeReader {
+    protected IntegerReader reader = null;
+
+    ShortTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected ShortTreeReader(int columnId, InStream present, InStream data,
+        OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final LongColumnVector result = (LongColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, result.vector, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class IntTreeReader extends TreeReader {
+    protected IntegerReader reader = null;
+
+    IntTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected IntTreeReader(int columnId, InStream present, InStream data,
+        OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final LongColumnVector result = (LongColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, result.vector, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class LongTreeReader extends TreeReader {
+    protected IntegerReader reader = null;
+
+    LongTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      this(columnId, null, null, null, skipCorrupt);
+    }
+
+    protected LongTreeReader(int columnId, InStream present, InStream data,
+        OrcProto.ColumnEncoding encoding,
+        boolean skipCorrupt)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, skipCorrupt);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final LongColumnVector result = (LongColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, result.vector, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class FloatTreeReader extends TreeReader {
+    protected InStream stream;
+    private final SerializationUtils utils;
+
+    FloatTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected FloatTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      this.utils = new SerializationUtils();
+      this.stream = data;
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final DoubleColumnVector result = (DoubleColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      final boolean hasNulls = !result.noNulls;
+      boolean allNulls = hasNulls;
+
+      if (hasNulls) {
+        // conditions to ensure bounds checks skips
+        for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) {
+          allNulls = allNulls & result.isNull[i];
+        }
+        if (allNulls) {
+          result.vector[0] = Double.NaN;
+          result.isRepeating = true;
+        } else {
+          // some nulls
+          result.isRepeating = false;
+          // conditions to ensure bounds checks skips
+          for (int i = 0; batchSize <= result.isNull.length
+              && batchSize <= result.vector.length && i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              result.vector[i] = utils.readFloat(stream);
+            } else {
+              // If the value is not present then set NaN
+              result.vector[i] = Double.NaN;
+            }
+          }
+        }
+      } else {
+        // no nulls & > 1 row (check repeating)
+        boolean repeating = (batchSize > 1);
+        final float f1 = utils.readFloat(stream);
+        result.vector[0] = f1;
+        // conditions to ensure bounds checks skips
+        for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
+          final float f2 = utils.readFloat(stream);
+          repeating = repeating && (f1 == f2);
+          result.vector[i] = f2;
+        }
+        result.isRepeating = repeating;
+      }
+    }
+
+    @Override
+    protected void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      for (int i = 0; i < items; ++i) {
+        utils.readFloat(stream);
+      }
+    }
+  }
+
+  public static class DoubleTreeReader extends TreeReader {
+    protected InStream stream;
+    private final SerializationUtils utils;
+
+    DoubleTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected DoubleTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      this.utils = new SerializationUtils();
+      this.stream = data;
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name =
+          new StreamName(columnId,
+              OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final DoubleColumnVector result = (DoubleColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      final boolean hasNulls = !result.noNulls;
+      boolean allNulls = hasNulls;
+
+      if (hasNulls) {
+        // conditions to ensure bounds checks skips
+        for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) {
+          allNulls = allNulls & result.isNull[i];
+        }
+        if (allNulls) {
+          result.vector[0] = Double.NaN;
+          result.isRepeating = true;
+        } else {
+          // some nulls
+          result.isRepeating = false;
+          // conditions to ensure bounds checks skips
+          for (int i = 0; batchSize <= result.isNull.length
+              && batchSize <= result.vector.length && i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              result.vector[i] = utils.readDouble(stream);
+            } else {
+              // If the value is not present then set NaN
+              result.vector[i] = Double.NaN;
+            }
+          }
+        }
+      } else {
+        // no nulls
+        boolean repeating = (batchSize > 1);
+        final double d1 = utils.readDouble(stream);
+        result.vector[0] = d1;
+        // conditions to ensure bounds checks skips
+        for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
+          final double d2 = utils.readDouble(stream);
+          repeating = repeating && (d1 == d2);
+          result.vector[i] = d2;
+        }
+        result.isRepeating = repeating;
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long len = items * 8;
+      while (len > 0) {
+        len -= stream.skip(len);
+      }
+    }
+  }
+
+  public static class BinaryTreeReader extends TreeReader {
+    protected InStream stream;
+    protected IntegerReader lengths = null;
+    protected final LongColumnVector scratchlcv;
+
+    BinaryTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null, null);
+    }
+
+    protected BinaryTreeReader(int columnId, InStream present, InStream data, InStream length,
+        OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present);
+      scratchlcv = new LongColumnVector();
+      this.stream = data;
+      if (length != null && encoding != null) {
+        checkEncoding(encoding);
+        this.lengths = createIntegerReader(encoding.getKind(), length, false, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+      lengths.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final BytesColumnVector result = (BytesColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long lengthToSkip = 0;
+      for (int i = 0; i < items; ++i) {
+        lengthToSkip += lengths.next();
+      }
+      while (lengthToSkip > 0) {
+        lengthToSkip -= stream.skip(lengthToSkip);
+      }
+    }
+  }
+
+  public static class TimestampTreeReader extends TreeReader {
+    protected IntegerReader data = null;
+    protected IntegerReader nanos = null;
+    private final boolean skipCorrupt;
+    private Map<String, Long> baseTimestampMap;
+    protected long base_timestamp;
+    private final TimeZone readerTimeZone;
+    private TimeZone writerTimeZone;
+    private boolean hasSameTZRules;
+
+    TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      this(columnId, null, null, null, null, skipCorrupt, null);
+    }
+
+    protected TimestampTreeReader(int columnId, InStream presentStream, InStream dataStream,
+        InStream nanosStream, OrcProto.ColumnEncoding encoding, boolean skipCorrupt, String writerTimezone)
+        throws IOException {
+      super(columnId, presentStream);
+      this.skipCorrupt = skipCorrupt;
+      this.baseTimestampMap = new HashMap<>();
+      this.readerTimeZone = TimeZone.getDefault();
+      this.writerTimeZone = readerTimeZone;
+      this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
+      this.base_timestamp = getBaseTimestamp(readerTimeZone.getID());
+      if (encoding != null) {
+        checkEncoding(encoding);
+
+        if (dataStream != null) {
+          this.data = createIntegerReader(encoding.getKind(), dataStream, true, skipCorrupt);
+        }
+
+        if (nanosStream != null) {
+          this.nanos = createIntegerReader(encoding.getKind(), nanosStream, false, skipCorrupt);
+        }
+        base_timestamp = getBaseTimestamp(writerTimezone);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId,
+              OrcProto.Stream.Kind.DATA)), true, skipCorrupt);
+      nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId,
+              OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt);
+      base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone());
+    }
+
+    protected long getBaseTimestamp(String timeZoneId) throws IOException {
+      // to make sure new readers read old files in the same way
+      if (timeZoneId == null || timeZoneId.isEmpty()) {
+        timeZoneId = readerTimeZone.getID();
+      }
+
+      if (!baseTimestampMap.containsKey(timeZoneId)) {
+        writerTimeZone = TimeZone.getTimeZone(timeZoneId);
+        hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
+        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        sdf.setTimeZone(writerTimeZone);
+        try {
+          long epoch =
+              sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND;
+          baseTimestampMap.put(timeZoneId, epoch);
+          return epoch;
+        } catch (ParseException e) {
+          throw new IOException("Unable to create base timestamp", e);
+        } finally {
+          sdf.setTimeZone(readerTimeZone);
+        }
+      }
+
+      return baseTimestampMap.get(timeZoneId);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      data.seek(index);
+      nanos.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      TimestampColumnVector result = (TimestampColumnVector) previousVector;
+      super.nextVector(previousVector, isNull, batchSize);
+
+      for (int i = 0; i < batchSize; i++) {
+        if (result.noNulls || !result.isNull[i]) {
+          long millis = data.next() + base_timestamp;
+          int newNanos = parseNanos(nanos.next());
+          if (millis < 0 && newNanos != 0) {
+            millis -= 1;
+          }
+          millis *= WriterImpl.MILLIS_PER_SECOND;
+          long offset = 0;
+          // If reader and writer time zones have different rules, adjust the timezone difference
+          // between reader and writer taking day light savings into account.
+          if (!hasSameTZRules) {
+            offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis);
+          }
+          long adjustedMillis = millis + offset;
+          // Sometimes the reader timezone might have changed after adding the adjustedMillis.
+          // To account for that change, check for any difference in reader timezone after
+          // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time).
+          if (!hasSameTZRules &&
+              (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) {
+            long newOffset =
+                writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis);
+            adjustedMillis = millis + newOffset;
+          }
+          result.time[i] = adjustedMillis;
+          result.nanos[i] = newNanos;
+          if (result.isRepeating && i != 0 &&
+              (result.time[0] != result.time[i] ||
+                  result.nanos[0] != result.nanos[i])) {
+            result.isRepeating = false;
+          }
+        }
+      }
+    }
+
+    private static int parseNanos(long serialized) {
+      int zeros = 7 & (int) serialized;
+      int result = (int) (serialized >>> 3);
+      if (zeros != 0) {
+        for (int i = 0; i <= zeros; ++i) {
+          result *= 10;
+        }
+      }
+      return result;
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      data.skip(items);
+      nanos.skip(items);
+    }
+  }
+
+  public static class DateTreeReader extends TreeReader {
+    protected IntegerReader reader = null;
+
+    DateTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected DateTreeReader(int columnId, InStream present, InStream data,
+        OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final LongColumnVector result = (LongColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      // Read value entries based on isNull entries
+      reader.nextVector(result, result.vector, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class DecimalTreeReader extends TreeReader {
+    protected InStream valueStream;
+    protected IntegerReader scaleReader = null;
+    private int[] scratchScaleVector;
+    private byte[] scratchBytes;
+
+    private final int precision;
+    private final int scale;
+
+    DecimalTreeReader(int columnId, int precision, int scale) throws IOException {
+      this(columnId, precision, scale, null, null, null, null);
+    }
+
+    protected DecimalTreeReader(int columnId, int precision, int scale, InStream present,
+        InStream valueStream, InStream scaleStream, OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      this.precision = precision;
+      this.scale = scale;
+      this.scratchScaleVector = new int[VectorizedRowBatch.DEFAULT_SIZE];
+      this.valueStream = valueStream;
+      this.scratchBytes = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_SERIALIZATION_UTILS_READ];
+      if (scaleStream != null && encoding != null) {
+        checkEncoding(encoding);
+        this.scaleReader = createIntegerReader(encoding.getKind(), scaleStream, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      valueStream = streams.get(new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA));
+      scaleReader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      valueStream.seek(index);
+      scaleReader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final DecimalColumnVector result = (DecimalColumnVector) previousVector;
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      if (batchSize > scratchScaleVector.length) {
+        scratchScaleVector = new int[(int) batchSize];
+      }
+      // read the scales
+      scaleReader.nextVector(result, scratchScaleVector, batchSize);
+      // Read value entries based on isNull entries
+      // Use the fast ORC deserialization method that emulates SerializationUtils.readBigInteger
+      // provided by HiveDecimalWritable.
+      HiveDecimalWritable[] vector = result.vector;
+      HiveDecimalWritable decWritable;
+      if (result.noNulls) {
+        for (int r=0; r < batchSize; ++r) {
+          decWritable = vector[r];
+          if (!decWritable.serializationUtilsRead(
+              valueStream, scratchScaleVector[r],
+              scratchBytes)) {
+            result.isNull[r] = true;
+            result.noNulls = false;
+          }
+        }
+      } else if (!result.isRepeating || !result.isNull[0]) {
+        for (int r=0; r < batchSize; ++r) {
+          if (!result.isNull[r]) {
+            decWritable = vector[r];
+            if (!decWritable.serializationUtilsRead(
+                valueStream, scratchScaleVector[r],
+                scratchBytes)) {
+              result.isNull[r] = true;
+              result.noNulls = false;
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      HiveDecimalWritable scratchDecWritable = new HiveDecimalWritable();
+      for (int i = 0; i < items; i++) {
+        scratchDecWritable.serializationUtilsRead(valueStream, 0, scratchBytes);
+      }
+      scaleReader.skip(items);
+    }
+  }
+
+  /**
+   * A tree reader that will read string columns. At the start of the
+   * stripe, it creates an internal reader based on whether a direct or
+   * dictionary encoding was used.
+   */
+  public static class StringTreeReader extends TreeReader {
+    protected TreeReader reader;
+
+    StringTreeReader(int columnId) throws IOException {
+      super(columnId);
+    }
+
+    protected StringTreeReader(int columnId, InStream present, InStream data, InStream length,
+        InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present);
+      if (encoding != null) {
+        switch (encoding.getKind()) {
+          case DIRECT:
+          case DIRECT_V2:
+            reader = new StringDirectTreeReader(columnId, present, data, length,
+                encoding.getKind());
+            break;
+          case DICTIONARY:
+          case DICTIONARY_V2:
+            reader = new StringDictionaryTreeReader(columnId, present, data, length, dictionary,
+                encoding);
+            break;
+          default:
+            throw new IllegalArgumentException("Unsupported encoding " +
+                encoding.getKind());
+        }
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      reader.checkEncoding(encoding);
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      // For each stripe, checks the encoding and initializes the appropriate
+      // reader
+      switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
+        case DIRECT:
+        case DIRECT_V2:
+          reader = new StringDirectTreeReader(columnId);
+          break;
+        case DICTIONARY:
+        case DICTIONARY_V2:
+          reader = new StringDictionaryTreeReader(columnId);
+          break;
+        default:
+          throw new IllegalArgumentException("Unsupported encoding " +
+              stripeFooter.getColumnsList().get(columnId).getKind());
+      }
+      reader.startStripe(streams, stripeFooter);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      reader.nextVector(previousVector, isNull, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skipRows(items);
+    }
+  }
+
+  private static org.slf4j.Logger LOG = org.slf4j.LoggerFactory.getLogger(TreeReaderFactory.class);
+  // This class collects together very similar methods for reading an ORC vector of byte arrays and
+  // creating the BytesColumnVector.
+  //
+  public static class BytesColumnVectorUtil {
+
+    private static byte[] commonReadByteArrays(InStream stream, IntegerReader lengths,
+        LongColumnVector scratchlcv,
+        BytesColumnVector result, final int batchSize) throws IOException {
+      // Read lengths
+      scratchlcv.isNull = result.isNull;  // Notice we are replacing the isNull vector here...
+      scratchlcv.ensureSize(batchSize, false);
+      lengths.nextVector(scratchlcv, scratchlcv.vector, batchSize);
+      int totalLength = 0;
+      if (!scratchlcv.isRepeating) {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            totalLength += (int) scratchlcv.vector[i];
+          }
+        }
+      } else {
+        if (!scratchlcv.isNull[0]) {
+          totalLength = (int) (batchSize * scratchlcv.vector[0]);
+        }
+      }
+
+      // Read all the strings for this batch
+      byte[] allBytes = new byte[totalLength];
+      int offset = 0;
+      int len = totalLength;
+      while (len > 0) {
+        int bytesRead = stream.read(allBytes, offset, len);
+        if (bytesRead < 0) {
+          throw new EOFException("Can't finish byte read from " + stream);
+        }
+        len -= bytesRead;
+        offset += bytesRead;
+      }
+
+      return allBytes;
+    }
+
+    // This method has the common code for reading in bytes into a BytesColumnVector.
+    public static void readOrcByteArrays(InStream stream,
+                                         IntegerReader lengths,
+                                         LongColumnVector scratchlcv,
+                                         BytesColumnVector result,
+                                         final int batchSize) throws IOException {
+      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
+        byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv,
+            result, (int) batchSize);
+
+        // Too expensive to figure out 'repeating' by comparisons.
+        result.isRepeating = false;
+        int offset = 0;
+        if (!scratchlcv.isRepeating) {
+          for (int i = 0; i < batchSize; i++) {
+            if (!scratchlcv.isNull[i]) {
+              result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
+              offset += scratchlcv.vector[i];
+            } else {
+              result.setRef(i, allBytes, 0, 0);
+            }
+          }
+        } else {
+          for (int i = 0; i < batchSize; i++) {
+            if (!scratchlcv.isNull[i]) {
+              result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
+              offset += scratchlcv.vector[0];
+            } else {
+              result.setRef(i, allBytes, 0, 0);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * A reader for string columns that are direct encoded in the current
+   * stripe.
+   */
+  public static class StringDirectTreeReader extends TreeReader {
+    private static final HadoopShims SHIMS = HadoopShims.Factory.get();
+    protected InStream stream;
+    protected HadoopShims.TextReaderShim data;
+    protected IntegerReader lengths;
+    private final LongColumnVector scratchlcv;
+
+    StringDirectTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null, null);
+    }
+
+    protected StringDirectTreeReader(int columnId, InStream present, InStream data,
+        InStream length, OrcProto.ColumnEncoding.Kind encoding) throws IOException {
+      super(columnId, present);
+      this.scratchlcv = new LongColumnVector();
+      this.stream = data;
+      if (length != null && encoding != null) {
+        this.lengths = createIntegerReader(encoding, length, false, false);
+        this.data = SHIMS.getTextReaderShim(this.stream);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT &&
+          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+      data = SHIMS.getTextReaderShim(this.stream);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
+          false, false);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+      // don't seek data stream
+      lengths.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final BytesColumnVector result = (BytesColumnVector) previousVector;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv,
+          result, batchSize);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long lengthToSkip = 0;
+      for (int i = 0; i < items; ++i) {
+        lengthToSkip += lengths.next();
+      }
+
+      while (lengthToSkip > 0) {
+        lengthToSkip -= stream.skip(lengthToSkip);
+      }
+    }
+
+    public IntegerReader getLengths() {
+      return lengths;
+    }
+
+    public InStream getStream() {
+      return stream;
+    }
+  }
+
+  /**
+   * A reader for string columns that are dictionary encoded in the current
+   * stripe.
+   */
+  public static class StringDictionaryTreeReader extends TreeReader {
+    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+    private DynamicByteArray dictionaryBuffer;
+    private int[] dictionaryOffsets;
+    protected IntegerReader reader;
+
+    private byte[] dictionaryBufferInBytesCache = null;
+    private final LongColumnVector scratchlcv;
+
+    StringDictionaryTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null, null, null);
+    }
+
+    protected StringDictionaryTreeReader(int columnId, InStream present, InStream data,
+        InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      scratchlcv = new LongColumnVector();
+      if (data != null && encoding != null) {
+        this.reader = createIntegerReader(encoding.getKind(), data, false, false);
+      }
+
+      if (dictionary != null && encoding != null) {
+        readDictionaryStream(dictionary);
+      }
+
+      if (length != null && encoding != null) {
+        readDictionaryLengthStream(length, encoding);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY &&
+          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+
+      // read the dictionary blob
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DICTIONARY_DATA);
+      InStream in = streams.get(name);
+      readDictionaryStream(in);
+
+      // read the lengths
+      name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH);
+      in = streams.get(name);
+      readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId));
+
+      // set up the row reader
+      name = new StreamName(columnId, OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), false, false);
+    }
+
+    private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      int dictionarySize = encoding.getDictionarySize();
+      if (in != null) { // Guard against empty LENGTH stream.
+        IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false);
+        int offset = 0;
+        if (dictionaryOffsets == null ||
+            dictionaryOffsets.length < dictionarySize + 1) {
+          dictionaryOffsets = new int[dictionarySize + 1];
+        }
+        for (int i = 0; i < dictionarySize; ++i) {
+          dictionaryOffsets[i] = offset;
+          offset += (int) lenReader.next();
+        }
+        dictionaryOffsets[dictionarySize] = offset;
+        in.close();
+      }
+
+    }
+
+    private void readDictionaryStream(InStream in) throws IOException {
+      if (in != null) { // Guard against empty dictionary stream.
+        if (in.available() > 0) {
+          dictionaryBuffer = new DynamicByteArray(64, in.available());
+          dictionaryBuffer.readAll(in);
+          // Since its start of strip invalidate the cache.
+          dictionaryBufferInBytesCache = null;
+        }
+        in.close();
+      } else {
+        dictionaryBuffer = null;
+      }
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      final BytesColumnVector result = (BytesColumnVector) previousVector;
+      int offset;
+      int length;
+
+      // Read present/isNull stream
+      super.nextVector(result, isNull, batchSize);
+
+      if (dictionaryBuffer != null) {
+
+        // Load dictionaryBuffer into cache.
+        if (dictionaryBufferInBytesCache == null) {
+          dictionaryBufferInBytesCache = dictionaryBuffer.get();
+        }
+
+        // Read string offsets
+        scratchlcv.isNull = result.isNull;
+        scratchlcv.ensureSize((int) batchSize, false);
+        reader.nextVector(scratchlcv, scratchlcv.vector, batchSize);
+        if (!scratchlcv.isRepeating) {
+
+          // The vector has non-repeating strings. Iterate thru the batch
+          // and set strings one by one
+          for (int i = 0; i < batchSize; i++) {
+            if (!scratchlcv.isNull[i]) {
+              offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
+              length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
+              result.setRef(i, dictionaryBufferInBytesCache, offset, length);
+            } else {
+              // If the value is null then set offset and length to zero (null string)
+              result.setRef(i, dictionaryBufferInBytesCache, 0, 0);
+            }
+          }
+        } else {
+          // If the value is repeating then just set the first value in the
+          // vector and set the isRepeating flag to true. No need to iterate thru and
+          // set all the elements to the same value
+          offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
+          length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
+          result.setRef(0, dictionaryBufferInBytesCache, offset, length);
+        }
+        result.isRepeating = scratchlcv.isRepeating;
+      } else {
+        if (dictionaryOffsets == null) {
+          // Entire stripe contains null strings.
+          result.isRepeating = true;
+          result.noNulls = false;
+          result.isNull[0] = true;
+          result.setRef(0, EMPTY_BYTE_ARRAY, 0, 0);
+        } else {
+          // stripe contains nulls and empty strings
+          for (int i = 0; i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              result.setRef(i, EMPTY_BYTE_ARRAY, 0, 0);
+            }
+          }
+        }
+      }
+    }
+
+    int getDictionaryEntryLength(int entry, int offset) {
+      final int length;
+      // if it isn't the last entry, subtract the offsets otherwise use
+      // the buffer length.
+      if (entry < dictionaryOffsets.length - 1) {
+        length = dictionaryOffsets[entry + 1] - offset;
+      } else {
+        length = dictionaryBuffer.size() - offset;
+      }
+      return length;
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+
+    public IntegerReader getReader() {
+      return reader;
+    }
+  }
+
+  public static class CharTreeReader extends StringTreeReader {
+    int maxLength;
+
+    CharTreeReader(int columnId, int maxLength) throws IOException {
+      this(columnId, maxLength, null, null, null, null, null);
+    }
+
+    protected CharTreeReader(int columnId, int maxLength, InStream present, InStream data,
+        InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present, data, length, dictionary, encoding);
+      this.maxLength = maxLength;
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      // Get the vector of strings from StringTreeReader, then make a 2nd pass to
+      // adjust down the length (right trim and truncate) if necessary.
+      super.nextVector(previousVector, isNull, batchSize);
+      BytesColumnVector result = (BytesColumnVector) previousVector;
+      int adjustedDownLen;
+      if (result.isRepeating) {
+        if (result.noNulls || !result.isNull[0]) {
+          adjustedDownLen = StringExpr
+              .rightTrimAndTruncate(result.vector[0], result.start[0], result.length[0], maxLength);
+          if (adjustedDownLen < result.length[0]) {
+            result.setRef(0, result.vector[0], result.start[0], adjustedDownLen);
+          }
+        }
+      } else {
+        if (result.noNulls) {
+          for (int i = 0; i < batchSize; i++) {
+            adjustedDownLen = StringExpr
+                .rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i],
+                    maxLength);
+            if (adjustedDownLen < result.length[i]) {
+              result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
+            }
+          }
+        } else {
+          for (int i = 0; i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              adjustedDownLen = StringExpr
+                  .rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i],
+                      maxLength);
+              if (adjustedDownLen < result.length[i]) {
+                result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  public static class VarcharTreeReader extends StringTreeReader {
+    int maxLength;
+
+    VarcharTreeReader(int columnId, int maxLength) throws IOException {
+      this(columnId, maxLength, null, null, null, null, null);
+    }
+
+    protected VarcharTreeReader(int columnId, int maxLength, InStream present, InStream data,
+        InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present, data, length, dictionary, encoding);
+      this.maxLength = maxLength;
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      // Get the vector of strings from StringTreeReader, then make a 2nd pass to
+      // adjust down the length (truncate) if necessary.
+      super.nextVector(previousVector, isNull, batchSize);
+      BytesColumnVector result = (BytesColumnVector) previousVector;
+
+      int adjustedDownLen;
+      if (result.isRepeating) {
+        if (result.noNulls || !result.isNull[0]) {
+          adjustedDownLen = StringExpr
+              .truncate(result.vector[0], result.start[0], result.length[0], maxLength);
+          if (adjustedDownLen < result.length[0]) {
+            result.setRef(0, result.vector[0], result.start[0], adjustedDownLen);
+          }
+        }
+      } else {
+        if (result.noNulls) {
+          for (int i = 0; i < batchSize; i++) {
+            adjustedDownLen = StringExpr
+                .truncate(result.vector[i], result.start[i], result.length[i], maxLength);
+            if (adjustedDownLen < result.length[i]) {
+              result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
+            }
+          }
+        } else {
+          for (int i = 0; i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              adjustedDownLen = StringExpr
+                  .truncate(result.vector[i], result.start[i], result.length[i], maxLength);
+              if (adjustedDownLen < result.length[i]) {
+                result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  public static class StructTreeReader extends TreeReader {
+    protected final TreeReader[] fields;
+
+    protected StructTreeReader(int columnId,
+                               TypeDescription readerSchema,
+                               SchemaEvolution evolution,
+                               boolean[] included,
+                               boolean skipCorrupt) throws IOException {
+      super(columnId);
+
+      List<TypeDescription> childrenTypes = readerSchema.getChildren();
+      this.fields = new TreeReader[childrenTypes.size()];
+      for (int i = 0; i < fields.length; ++i) {
+        TypeDescription subtype = childrenTypes.get(i);
+        this.fields[i] = createTreeReader(subtype, evolution, included, skipCorrupt);
+      }
+    }
+
+    public TreeReader[] getChildReaders() {
+      return fields;
+    }
+
+    protected StructTreeReader(int columnId, InStream present,
+        OrcProto.ColumnEncoding encoding, TreeReader[] childReaders) throws IOException {
+      super(columnId, present);
+      if (encoding != null) {
+        checkEncoding(encoding);
+      }
+      this.fields = childReaders;
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      super.seek(index);
+      for (TreeReader kid : fields) {
+        if (kid != null) {
+          kid.seek(index);
+        }
+      }
+    }
+
+    @Override
+    public void nextBatch(VectorizedRowBatch batch,
+                          int batchSize) throws IOException {
+      for(int i=0; i < fields.length &&
+          (vectorColumnCount == -1 || i < vectorColumnCount); ++i) {
+        ColumnVector colVector = batch.cols[i];
+        if (colVector != null) {
+          colVector.reset();
+          colVector.ensureSize((int) batchSize, false);
+          fields[i].nextVector(colVector, null, batchSize);
+        }
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      super.nextVector(previousVector, isNull, batchSize);
+      StructColumnVector result = (StructColumnVector) previousVector;
+      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
+        result.isRepeating = false;
+
+        // Read all the members of struct as column vectors
+        boolean[] mask = result.noNulls ? null : result.isNull;
+        for (int f = 0; f < fields.length; f++) {
+          if (fields[f] != null) {
+            fields[f].nextVector(result.fields[f], mask, batchSize);
+          }
+        }
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      for (TreeReader field : fields) {
+        if (field != null) {
+          field.startStripe(streams, stripeFooter);
+        }
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      for (TreeReader field : fields) {
+        if (field != null) {
+          field.skipRows(items);
+        }
+      }
+    }
+  }
+
+  public static class UnionTreeReader extends TreeReader {
+    protected final TreeReader[] fields;
+    protected RunLengthByteReader tags;
+
+    protected UnionTreeReader(int fileColumn,
+                              TypeDescription readerSchema,
+                              SchemaEvolution evolution,
+                              boolean[] included,
+                              boolean skipCorrupt) throws IOException {
+      super(fileColumn);
+      List<TypeDescription> childrenTypes = readerSchema.getChildren();
+      int fieldCount = childrenTypes.size();
+      this.fields = new TreeReader[fieldCount];
+      for (int i = 0; i < fieldCount; ++i) {
+        TypeDescription subtype = childrenTypes.get(i);
+        this.fields[i] = createTreeReader(subtype, evolution, included, skipCorrupt);
+      }
+    }
+
+    protected UnionTreeReader(int columnId, InStream present,
+        OrcProto.ColumnEncoding encoding, TreeReader[] childReaders) throws IOException {
+      super(columnId, present);
+      if (encoding != null) {
+        checkEncoding(encoding);
+      }
+      this.fields = childReaders;
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      super.seek(index);
+      tags.seek(index[columnId]);
+      for (TreeReader kid : fields) {
+        kid.seek(index);
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      UnionColumnVector result = (UnionColumnVector) previousVector;
+      super.nextVector(result, isNull, batchSize);
+      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
+        result.isRepeating = false;
+        tags.nextVector(result.noNulls ? null : result.isNull, result.tags,
+            batchSize);
+        boolean[] ignore = new boolean[(int) batchSize];
+        for (int f = 0; f < result.fields.length; ++f) {
+          // build the ignore list for this tag
+          for (int r = 0; r < batchSize; ++r) {
+            ignore[r] = (!result.noNulls && result.isNull[r]) ||
+                result.tags[r] != f;
+          }
+          fields[f].nextVector(result.fields[f], ignore, batchSize);
+        }
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      tags = new RunLengthByteReader(streams.get(new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA)));
+      for (TreeReader field : fields) {
+        if (field != null) {
+          field.startStripe(streams, stripeFooter);
+        }
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long[] counts = new long[fields.length];
+      for (int i = 0; i < items; ++i) {
+        counts[tags.next()] += 1;
+      }
+      for (int i = 0; i < counts.length; ++i) {
+        fields[i].skipRows(counts[i]);
+      }
+    }
+  }
+
+  public static class ListTreeReader extends TreeReader {
+    protected final TreeReader elementReader;
+    protected IntegerReader lengths = null;
+
+    protected ListTreeReader(int fileColumn,
+                             TypeDescription readerSchema,
+                             SchemaEvolution evolution,
+                             boolean[] included,
+                             boolean skipCorrupt) throws IOException {
+      super(fileColumn);
+      TypeDescription elementType = readerSchema.getChildren().get(0);
+      elementReader = createTreeReader(elementType, evolution, included,
+          skipCorrupt);
+    }
+
+    protected ListTreeReader(int columnId, InStream present, InStream data,
+        OrcProto.ColumnEncoding encoding, TreeReader elementReader) throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.lengths = createIntegerReader(encoding.getKind(), data, false, false);
+      }
+      this.elementReader = elementReader;
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      super.seek(index);
+      lengths.seek(index[columnId]);
+      elementReader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previous,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      ListColumnVector result = (ListColumnVector) previous;
+      super.nextVector(result, isNull, batchSize);
+      // if we have some none-null values, then read them
+      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
+        lengths.nextVector(result, result.lengths, batchSize);
+        // even with repeating lengths, the list doesn't repeat
+        result.isRepeating = false;
+        // build the offsets vector and figure out how many children to read
+        result.childCount = 0;
+        for (int r = 0; r < batchSize; ++r) {
+          if (result.noNulls || !result.isNull[r]) {
+            result.offsets[r] = result.childCount;
+            result.childCount += result.lengths[r];
+          }
+        }
+        result.child.ensureSize(result.childCount, false);
+        elementReader.nextVector(result.child, null, result.childCount);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId,
+              OrcProto.Stream.Kind.LENGTH)), false, false);
+      if (elementReader != null) {
+        elementReader.startStripe(streams, stripeFooter);
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long childSkip = 0;
+      for (long i = 0; i < items; ++i) {
+        childSkip += lengths.next();
+      }
+      elementReader.skipRows(childSkip);
+    }
+  }
+
+  public static class MapTreeReader extends TreeReader {
+    protected final TreeReader keyReader;
+    protected final TreeReader valueReader;
+    protected IntegerReader lengths = null;
+
+    protected MapTreeReader(int fileColumn,
+                            TypeDescription readerSchema,
+                            SchemaEvolution evolution,
+                            boolean[] included,
+                            boolean skipCorrupt) throws IOException {
+      super(fileColumn);
+      TypeDescription keyType = readerSchema.getChildren().get(0);
+      TypeDescription valueType = readerSchema.getChildren().get(1);
+      keyReader = createTreeReader(keyType, evolution, included, skipCorrupt);
+      valueReader = createTreeReader(valueType, evolution, included, skipCorrupt);
+    }
+
+    protected MapTreeReader(int columnId, InStream present, InStream data,
+        OrcProto.ColumnEncoding encoding, TreeReader keyReader, TreeReader valueReader)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.lengths = createIntegerReader(encoding.getKind(), data, false, false);
+      }
+      this.keyReader = keyReader;
+      this.valueReader = valueReader;
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+      super.seek(index);
+      lengths.seek(index[columnId]);
+      keyReader.seek(index);
+      valueReader.seek(index);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previous,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      MapColumnVector result = (MapColumnVector) previous;
+      super.nextVector(result, isNull, batchSize);
+      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
+        lengths.nextVector(result, result.lengths, batchSize);
+        // even with repeating lengths, the map doesn't repeat
+        result.isRepeating = false;
+        // build the offsets vector and figure out how many children to read
+        result.childCount = 0;
+        for (int r = 0; r < batchSize; ++r) {
+          if (result.noNulls || !result.isNull[r]) {
+            result.offsets[r] = result.childCount;
+            result.childCount += result.lengths[r];
+          }
+        }
+        result.keys.ensureSize(result.childCount, false);
+        result.values.ensureSize(result.childCount, false);
+        keyReader.nextVector(result.keys, null, result.childCount);
+        valueReader.nextVector(result.values, null, result.childCount);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new StreamName(columnId,
+              OrcProto.Stream.Kind.LENGTH)), false, false);
+      if (keyReader != null) {
+        keyReader.startStripe(streams, stripeFooter);
+      }
+      if (valueReader != null) {
+        valueReader.startStripe(streams, stripeFooter);
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long childSkip = 0;
+      for (long i = 0; i < items; ++i) {
+        childSkip += lengths.next();
+      }
+      keyReader.skipRows(childSkip);
+      valueReader.skipRows(childSkip);
+    }
+  }
+
+  public static TreeReader createTreeReader(TypeDescription readerType,
+                                            SchemaEvolution evolution,
+                                            boolean[] included,
+                                            boolean skipCorrupt
+                                            ) throws IOException {
+    TypeDescription fileType = evolution.getFileType(readerType);
+    if (fileType == null || !evolution.includeReaderColumn(readerType.getId())){
+      return new NullTreeReader(0);
+    }
+    TypeDescription.Category readerTypeCategory = readerType.getCategory();
+    if (!fileType.equals(readerType) &&
+        (readerTypeCategory != TypeDescription.Category.STRUCT &&
+         readerTypeCategory != TypeDescription.Category.MAP &&
+         readerTypeCategory != TypeDescription.Category.LIST &&
+         readerTypeCategory != TypeDescription.Category.UNION)) {
+      // We only convert complex children.
+      return ConvertTreeReaderFactory.createConvertTreeReader(readerType, evolution,
+          included, skipCorrupt);
+    }
+    switch (readerTypeCategory) {
+      case BOOLEAN:
+        return new BooleanTreeReader(fileType.getId());
+      case BYTE:
+        return new ByteTreeReader(fileType.getId());
+      case DOUBLE:
+        return new DoubleTreeReader(fileType.getId());
+      case FLOAT:
+        return new FloatTreeReader(fileType.getId());
+      case SHORT:
+        return new ShortTreeReader(fileType.getId());
+      case INT:
+        return new IntTreeReader(fileType.getId());
+      case LONG:
+        return new LongTreeReader(fileType.getId(), skipCorrupt);
+      case STRING:
+        return new StringTreeReader(fileType.getId());
+      case CHAR:
+        return new CharTreeReader(fileType.getId(), readerType.getMaxLength());
+      case VARCHAR:
+        return new VarcharTreeReader(fileType.getId(), readerType.getMaxLength());
+      case BINARY:
+        return new BinaryTreeReader(fileType.getId());
+      case TIMESTAMP:
+        return new TimestampTreeReader(fileType.getId(), skipCorrupt);
+      case DATE:
+        return new DateTreeReader(fileType.getId());
+      case DECIMAL:
+        return new DecimalTreeReader(fileType.getId(), readerType.getPrecision(),
+            readerType.getScale());
+      case STRUCT:
+        return new StructTreeReader(fileType.getId(), readerType,
+            evolution, included, skipCorrupt);
+      case LIST:
+        return new ListTreeReader(fileType.getId(), readerType,
+            evolution, included, skipCorrupt);
+      case MAP:
+        return new MapTreeReader(fileType.getId(), readerType, evolution,
+            included, skipCorrupt);
+      case UNION:
+        return new UnionTreeReader(fileType.getId(), readerType,
+            evolution, included, skipCorrupt);
+      default:
+        throw new IllegalArgumentException("Unsupported type " +
+            readerTypeCategory);
+    }
+  }
+}


[15/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java b/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java
deleted file mode 100644
index eceba17..0000000
--- a/orc/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ /dev/null
@@ -1,2163 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.OrcProto;
-
-/**
- * Factory for creating ORC tree readers.
- */
-public class TreeReaderFactory {
-
-  public abstract static class TreeReader {
-    protected final int columnId;
-    protected BitFieldReader present = null;
-    protected boolean valuePresent = false;
-    protected int vectorColumnCount;
-
-    TreeReader(int columnId) throws IOException {
-      this(columnId, null);
-    }
-
-    protected TreeReader(int columnId, InStream in) throws IOException {
-      this.columnId = columnId;
-      if (in == null) {
-        present = null;
-        valuePresent = true;
-      } else {
-        present = new BitFieldReader(in, 1);
-      }
-      vectorColumnCount = -1;
-    }
-
-    void setVectorColumnCount(int vectorColumnCount) {
-      this.vectorColumnCount = vectorColumnCount;
-    }
-
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    protected static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
-        InStream in,
-        boolean signed, boolean skipCorrupt) throws IOException {
-      switch (kind) {
-        case DIRECT_V2:
-        case DICTIONARY_V2:
-          return new RunLengthIntegerReaderV2(in, signed, skipCorrupt);
-        case DIRECT:
-        case DICTIONARY:
-          return new RunLengthIntegerReader(in, signed);
-        default:
-          throw new IllegalArgumentException("Unknown encoding " + kind);
-      }
-    }
-
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      checkEncoding(stripeFooter.getColumnsList().get(columnId));
-      InStream in = streams.get(new StreamName(columnId,
-          OrcProto.Stream.Kind.PRESENT));
-      if (in == null) {
-        present = null;
-        valuePresent = true;
-      } else {
-        present = new BitFieldReader(in, 1);
-      }
-    }
-
-    /**
-     * Seek to the given position.
-     *
-     * @param index the indexes loaded from the file
-     * @throws IOException
-     */
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    public void seek(PositionProvider index) throws IOException {
-      if (present != null) {
-        present.seek(index);
-      }
-    }
-
-    protected long countNonNulls(long rows) throws IOException {
-      if (present != null) {
-        long result = 0;
-        for (long c = 0; c < rows; ++c) {
-          if (present.next() == 1) {
-            result += 1;
-          }
-        }
-        return result;
-      } else {
-        return rows;
-      }
-    }
-
-    abstract void skipRows(long rows) throws IOException;
-
-    /**
-     * Called at the top level to read into the given batch.
-     * @param batch the batch to read into
-     * @param batchSize the number of rows to read
-     * @throws IOException
-     */
-    public void nextBatch(VectorizedRowBatch batch,
-                          int batchSize) throws IOException {
-      batch.cols[0].reset();
-      batch.cols[0].ensureSize(batchSize, false);
-      nextVector(batch.cols[0], null, batchSize);
-    }
-
-    /**
-     * Populates the isNull vector array in the previousVector object based on
-     * the present stream values. This function is called from all the child
-     * readers, and they all set the values based on isNull field value.
-     *
-     * @param previous The columnVector object whose isNull value is populated
-     * @param isNull Whether the each value was null at a higher level. If
-     *               isNull is null, all values are non-null.
-     * @param batchSize      Size of the column vector
-     * @throws IOException
-     */
-    // TODO: it looks like isNull is never used; it's always null. Remove/deprecate?
-    public void nextVector(ColumnVector previous,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (present != null || isNull != null) {
-        // Set noNulls and isNull vector of the ColumnVector based on
-        // present stream
-        previous.noNulls = true;
-        boolean allNull = true;
-        for (int i = 0; i < batchSize; i++) {
-          if (isNull == null || !isNull[i]) {
-            if (present != null && present.next() != 1) {
-              previous.noNulls = false;
-              previous.isNull[i] = true;
-            } else {
-              previous.isNull[i] = false;
-              allNull = false;
-            }
-          } else {
-            previous.noNulls = false;
-            previous.isNull[i] = true;
-          }
-        }
-        previous.isRepeating = !previous.noNulls && allNull;
-      } else {
-        // There is no present stream, this means that all the values are
-        // present.
-        previous.noNulls = true;
-        for (int i = 0; i < batchSize; i++) {
-          previous.isNull[i] = false;
-        }
-      }
-    }
-
-    public BitFieldReader getPresent() {
-      return present;
-    }
-
-    public int getColumnId() {
-      return columnId;
-    }
-  }
-
-  public static class NullTreeReader extends TreeReader {
-
-    public NullTreeReader(int columnId) throws IOException {
-      super(columnId);
-    }
-
-    @Override
-    public void startStripe(Map<StreamName, InStream> streams,
-                            OrcProto.StripeFooter footer) {
-      // PASS
-    }
-
-    @Override
-    void skipRows(long rows) {
-      // PASS
-    }
-
-    @Override
-    public void seek(PositionProvider position) {
-      // PASS
-    }
-
-    @Override
-    public void seek(PositionProvider[] position) {
-      // PASS
-    }
-
-    @Override
-    public void nextVector(ColumnVector vector, boolean[] isNull, int size) {
-      vector.noNulls = false;
-      vector.isNull[0] = true;
-      vector.isRepeating = true;
-    }
-  }
-
-  public static class BooleanTreeReader extends TreeReader {
-    protected BitFieldReader reader = null;
-
-    BooleanTreeReader(int columnId) throws IOException {
-      this(columnId, null, null);
-    }
-
-    protected BooleanTreeReader(int columnId, InStream present, InStream data) throws IOException {
-      super(columnId, present);
-      if (data != null) {
-        reader = new BitFieldReader(data, 1);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      reader = new BitFieldReader(streams.get(new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA)), 1);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      LongColumnVector result = (LongColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      // Read value entries based on isNull entries
-      reader.nextVector(result, batchSize);
-    }
-  }
-
-  public static class ByteTreeReader extends TreeReader {
-    protected RunLengthByteReader reader = null;
-
-    ByteTreeReader(int columnId) throws IOException {
-      this(columnId, null, null);
-    }
-
-    protected ByteTreeReader(int columnId, InStream present, InStream data) throws IOException {
-      super(columnId, present);
-      this.reader = new RunLengthByteReader(data);
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      reader = new RunLengthByteReader(streams.get(new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA)));
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-  }
-
-  public static class ShortTreeReader extends TreeReader {
-    protected IntegerReader reader = null;
-
-    ShortTreeReader(int columnId) throws IOException {
-      this(columnId, null, null, null);
-    }
-
-    protected ShortTreeReader(int columnId, InStream present, InStream data,
-        OrcProto.ColumnEncoding encoding)
-        throws IOException {
-      super(columnId, present);
-      if (data != null && encoding != null) {
-        checkEncoding(encoding);
-        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(name), true, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-  }
-
-  public static class IntTreeReader extends TreeReader {
-    protected IntegerReader reader = null;
-
-    IntTreeReader(int columnId) throws IOException {
-      this(columnId, null, null, null);
-    }
-
-    protected IntTreeReader(int columnId, InStream present, InStream data,
-        OrcProto.ColumnEncoding encoding)
-        throws IOException {
-      super(columnId, present);
-      if (data != null && encoding != null) {
-        checkEncoding(encoding);
-        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(name), true, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-  }
-
-  public static class LongTreeReader extends TreeReader {
-    protected IntegerReader reader = null;
-
-    LongTreeReader(int columnId, boolean skipCorrupt) throws IOException {
-      this(columnId, null, null, null, skipCorrupt);
-    }
-
-    protected LongTreeReader(int columnId, InStream present, InStream data,
-        OrcProto.ColumnEncoding encoding,
-        boolean skipCorrupt)
-        throws IOException {
-      super(columnId, present);
-      if (data != null && encoding != null) {
-        checkEncoding(encoding);
-        this.reader = createIntegerReader(encoding.getKind(), data, true, skipCorrupt);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(name), true, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-  }
-
-  public static class FloatTreeReader extends TreeReader {
-    protected InStream stream;
-    private final SerializationUtils utils;
-
-    FloatTreeReader(int columnId) throws IOException {
-      this(columnId, null, null);
-    }
-
-    protected FloatTreeReader(int columnId, InStream present, InStream data) throws IOException {
-      super(columnId, present);
-      this.utils = new SerializationUtils();
-      this.stream = data;
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      stream = streams.get(name);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      stream.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final DoubleColumnVector result = (DoubleColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      final boolean hasNulls = !result.noNulls;
-      boolean allNulls = hasNulls;
-
-      if (hasNulls) {
-        // conditions to ensure bounds checks skips
-        for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) {
-          allNulls = allNulls & result.isNull[i];
-        }
-        if (allNulls) {
-          result.vector[0] = Double.NaN;
-          result.isRepeating = true;
-        } else {
-          // some nulls
-          result.isRepeating = false;
-          // conditions to ensure bounds checks skips
-          for (int i = 0; batchSize <= result.isNull.length
-              && batchSize <= result.vector.length && i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.vector[i] = utils.readFloat(stream);
-            } else {
-              // If the value is not present then set NaN
-              result.vector[i] = Double.NaN;
-            }
-          }
-        }
-      } else {
-        // no nulls & > 1 row (check repeating)
-        boolean repeating = (batchSize > 1);
-        final float f1 = utils.readFloat(stream);
-        result.vector[0] = f1;
-        // conditions to ensure bounds checks skips
-        for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
-          final float f2 = utils.readFloat(stream);
-          repeating = repeating && (f1 == f2);
-          result.vector[i] = f2;
-        }
-        result.isRepeating = repeating;
-      }
-    }
-
-    @Override
-    protected void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      for (int i = 0; i < items; ++i) {
-        utils.readFloat(stream);
-      }
-    }
-  }
-
-  public static class DoubleTreeReader extends TreeReader {
-    protected InStream stream;
-    private final SerializationUtils utils;
-
-    DoubleTreeReader(int columnId) throws IOException {
-      this(columnId, null, null);
-    }
-
-    protected DoubleTreeReader(int columnId, InStream present, InStream data) throws IOException {
-      super(columnId, present);
-      this.utils = new SerializationUtils();
-      this.stream = data;
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name =
-          new StreamName(columnId,
-              OrcProto.Stream.Kind.DATA);
-      stream = streams.get(name);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      stream.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final DoubleColumnVector result = (DoubleColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      final boolean hasNulls = !result.noNulls;
-      boolean allNulls = hasNulls;
-
-      if (hasNulls) {
-        // conditions to ensure bounds checks skips
-        for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) {
-          allNulls = allNulls & result.isNull[i];
-        }
-        if (allNulls) {
-          result.vector[0] = Double.NaN;
-          result.isRepeating = true;
-        } else {
-          // some nulls
-          result.isRepeating = false;
-          // conditions to ensure bounds checks skips
-          for (int i = 0; batchSize <= result.isNull.length
-              && batchSize <= result.vector.length && i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.vector[i] = utils.readDouble(stream);
-            } else {
-              // If the value is not present then set NaN
-              result.vector[i] = Double.NaN;
-            }
-          }
-        }
-      } else {
-        // no nulls
-        boolean repeating = (batchSize > 1);
-        final double d1 = utils.readDouble(stream);
-        result.vector[0] = d1;
-        // conditions to ensure bounds checks skips
-        for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
-          final double d2 = utils.readDouble(stream);
-          repeating = repeating && (d1 == d2);
-          result.vector[i] = d2;
-        }
-        result.isRepeating = repeating;
-      }
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      long len = items * 8;
-      while (len > 0) {
-        len -= stream.skip(len);
-      }
-    }
-  }
-
-  public static class BinaryTreeReader extends TreeReader {
-    protected InStream stream;
-    protected IntegerReader lengths = null;
-    protected final LongColumnVector scratchlcv;
-
-    BinaryTreeReader(int columnId) throws IOException {
-      this(columnId, null, null, null, null);
-    }
-
-    protected BinaryTreeReader(int columnId, InStream present, InStream data, InStream length,
-        OrcProto.ColumnEncoding encoding) throws IOException {
-      super(columnId, present);
-      scratchlcv = new LongColumnVector();
-      this.stream = data;
-      if (length != null && encoding != null) {
-        checkEncoding(encoding);
-        this.lengths = createIntegerReader(encoding.getKind(), length, false, false);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      stream = streams.get(name);
-      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      stream.seek(index);
-      lengths.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final BytesColumnVector result = (BytesColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      long lengthToSkip = 0;
-      for (int i = 0; i < items; ++i) {
-        lengthToSkip += lengths.next();
-      }
-      while (lengthToSkip > 0) {
-        lengthToSkip -= stream.skip(lengthToSkip);
-      }
-    }
-  }
-
-  public static class TimestampTreeReader extends TreeReader {
-    protected IntegerReader data = null;
-    protected IntegerReader nanos = null;
-    private final boolean skipCorrupt;
-    private Map<String, Long> baseTimestampMap;
-    protected long base_timestamp;
-    private final TimeZone readerTimeZone;
-    private TimeZone writerTimeZone;
-    private boolean hasSameTZRules;
-
-    TimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
-      this(columnId, null, null, null, null, skipCorrupt, null);
-    }
-
-    protected TimestampTreeReader(int columnId, InStream presentStream, InStream dataStream,
-        InStream nanosStream, OrcProto.ColumnEncoding encoding, boolean skipCorrupt, String writerTimezone)
-        throws IOException {
-      super(columnId, presentStream);
-      this.skipCorrupt = skipCorrupt;
-      this.baseTimestampMap = new HashMap<>();
-      this.readerTimeZone = TimeZone.getDefault();
-      this.writerTimeZone = readerTimeZone;
-      this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
-      this.base_timestamp = getBaseTimestamp(readerTimeZone.getID());
-      if (encoding != null) {
-        checkEncoding(encoding);
-
-        if (dataStream != null) {
-          this.data = createIntegerReader(encoding.getKind(), dataStream, true, skipCorrupt);
-        }
-
-        if (nanosStream != null) {
-          this.nanos = createIntegerReader(encoding.getKind(), nanosStream, false, skipCorrupt);
-        }
-        base_timestamp = getBaseTimestamp(writerTimezone);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId,
-              OrcProto.Stream.Kind.DATA)), true, skipCorrupt);
-      nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId,
-              OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt);
-      base_timestamp = getBaseTimestamp(stripeFooter.getWriterTimezone());
-    }
-
-    protected long getBaseTimestamp(String timeZoneId) throws IOException {
-      // to make sure new readers read old files in the same way
-      if (timeZoneId == null || timeZoneId.isEmpty()) {
-        timeZoneId = readerTimeZone.getID();
-      }
-
-      if (!baseTimestampMap.containsKey(timeZoneId)) {
-        writerTimeZone = TimeZone.getTimeZone(timeZoneId);
-        hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
-        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-        sdf.setTimeZone(writerTimeZone);
-        try {
-          long epoch =
-              sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND;
-          baseTimestampMap.put(timeZoneId, epoch);
-          return epoch;
-        } catch (ParseException e) {
-          throw new IOException("Unable to create base timestamp", e);
-        } finally {
-          sdf.setTimeZone(readerTimeZone);
-        }
-      }
-
-      return baseTimestampMap.get(timeZoneId);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      data.seek(index);
-      nanos.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      TimestampColumnVector result = (TimestampColumnVector) previousVector;
-      super.nextVector(previousVector, isNull, batchSize);
-
-      for (int i = 0; i < batchSize; i++) {
-        if (result.noNulls || !result.isNull[i]) {
-          long millis = data.next() + base_timestamp;
-          int newNanos = parseNanos(nanos.next());
-          if (millis < 0 && newNanos != 0) {
-            millis -= 1;
-          }
-          millis *= WriterImpl.MILLIS_PER_SECOND;
-          long offset = 0;
-          // If reader and writer time zones have different rules, adjust the timezone difference
-          // between reader and writer taking day light savings into account.
-          if (!hasSameTZRules) {
-            offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis);
-          }
-          long adjustedMillis = millis + offset;
-          // Sometimes the reader timezone might have changed after adding the adjustedMillis.
-          // To account for that change, check for any difference in reader timezone after
-          // adding adjustedMillis. If so use the new offset (offset at adjustedMillis point of time).
-          if (!hasSameTZRules &&
-              (readerTimeZone.getOffset(millis) != readerTimeZone.getOffset(adjustedMillis))) {
-            long newOffset =
-                writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(adjustedMillis);
-            adjustedMillis = millis + newOffset;
-          }
-          result.time[i] = adjustedMillis;
-          result.nanos[i] = newNanos;
-          if (result.isRepeating && i != 0 &&
-              (result.time[0] != result.time[i] ||
-                  result.nanos[0] != result.nanos[i])) {
-            result.isRepeating = false;
-          }
-        }
-      }
-    }
-
-    private static int parseNanos(long serialized) {
-      int zeros = 7 & (int) serialized;
-      int result = (int) (serialized >>> 3);
-      if (zeros != 0) {
-        for (int i = 0; i <= zeros; ++i) {
-          result *= 10;
-        }
-      }
-      return result;
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      data.skip(items);
-      nanos.skip(items);
-    }
-  }
-
-  public static class DateTreeReader extends TreeReader {
-    protected IntegerReader reader = null;
-
-    DateTreeReader(int columnId) throws IOException {
-      this(columnId, null, null, null);
-    }
-
-    protected DateTreeReader(int columnId, InStream present, InStream data,
-        OrcProto.ColumnEncoding encoding) throws IOException {
-      super(columnId, present);
-      if (data != null && encoding != null) {
-        checkEncoding(encoding);
-        reader = createIntegerReader(encoding.getKind(), data, true, false);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(name), true, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final LongColumnVector result = (LongColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      // Read value entries based on isNull entries
-      reader.nextVector(result, result.vector, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-  }
-
-  public static class DecimalTreeReader extends TreeReader {
-    protected InStream valueStream;
-    protected IntegerReader scaleReader = null;
-    private int[] scratchScaleVector;
-    private byte[] scratchBytes;
-
-    private final int precision;
-    private final int scale;
-
-    DecimalTreeReader(int columnId, int precision, int scale) throws IOException {
-      this(columnId, precision, scale, null, null, null, null);
-    }
-
-    protected DecimalTreeReader(int columnId, int precision, int scale, InStream present,
-        InStream valueStream, InStream scaleStream, OrcProto.ColumnEncoding encoding)
-        throws IOException {
-      super(columnId, present);
-      this.precision = precision;
-      this.scale = scale;
-      this.scratchScaleVector = new int[VectorizedRowBatch.DEFAULT_SIZE];
-      this.valueStream = valueStream;
-      this.scratchBytes = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_SERIALIZATION_UTILS_READ];
-      if (scaleStream != null && encoding != null) {
-        checkEncoding(encoding);
-        this.scaleReader = createIntegerReader(encoding.getKind(), scaleStream, true, false);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      valueStream = streams.get(new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA));
-      scaleReader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.SECONDARY)), true, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      valueStream.seek(index);
-      scaleReader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final DecimalColumnVector result = (DecimalColumnVector) previousVector;
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      if (batchSize > scratchScaleVector.length) {
-        scratchScaleVector = new int[(int) batchSize];
-      }
-      // read the scales
-      scaleReader.nextVector(result, scratchScaleVector, batchSize);
-      // Read value entries based on isNull entries
-      // Use the fast ORC deserialization method that emulates SerializationUtils.readBigInteger
-      // provided by HiveDecimalWritable.
-      HiveDecimalWritable[] vector = result.vector;
-      HiveDecimalWritable decWritable;
-      if (result.noNulls) {
-        for (int r=0; r < batchSize; ++r) {
-          decWritable = vector[r];
-          if (!decWritable.serializationUtilsRead(
-              valueStream, scratchScaleVector[r],
-              scratchBytes)) {
-            result.isNull[r] = true;
-            result.noNulls = false;
-          }
-        }
-      } else if (!result.isRepeating || !result.isNull[0]) {
-        for (int r=0; r < batchSize; ++r) {
-          if (!result.isNull[r]) {
-            decWritable = vector[r];
-            if (!decWritable.serializationUtilsRead(
-                valueStream, scratchScaleVector[r],
-                scratchBytes)) {
-              result.isNull[r] = true;
-              result.noNulls = false;
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      HiveDecimalWritable scratchDecWritable = new HiveDecimalWritable();
-      for (int i = 0; i < items; i++) {
-        scratchDecWritable.serializationUtilsRead(valueStream, 0, scratchBytes);
-      }
-      scaleReader.skip(items);
-    }
-  }
-
-  /**
-   * A tree reader that will read string columns. At the start of the
-   * stripe, it creates an internal reader based on whether a direct or
-   * dictionary encoding was used.
-   */
-  public static class StringTreeReader extends TreeReader {
-    protected TreeReader reader;
-
-    StringTreeReader(int columnId) throws IOException {
-      super(columnId);
-    }
-
-    protected StringTreeReader(int columnId, InStream present, InStream data, InStream length,
-        InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
-      super(columnId, present);
-      if (encoding != null) {
-        switch (encoding.getKind()) {
-          case DIRECT:
-          case DIRECT_V2:
-            reader = new StringDirectTreeReader(columnId, present, data, length,
-                encoding.getKind());
-            break;
-          case DICTIONARY:
-          case DICTIONARY_V2:
-            reader = new StringDictionaryTreeReader(columnId, present, data, length, dictionary,
-                encoding);
-            break;
-          default:
-            throw new IllegalArgumentException("Unsupported encoding " +
-                encoding.getKind());
-        }
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      reader.checkEncoding(encoding);
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      // For each stripe, checks the encoding and initializes the appropriate
-      // reader
-      switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
-        case DIRECT:
-        case DIRECT_V2:
-          reader = new StringDirectTreeReader(columnId);
-          break;
-        case DICTIONARY:
-        case DICTIONARY_V2:
-          reader = new StringDictionaryTreeReader(columnId);
-          break;
-        default:
-          throw new IllegalArgumentException("Unsupported encoding " +
-              stripeFooter.getColumnsList().get(columnId).getKind());
-      }
-      reader.startStripe(streams, stripeFooter);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      reader.seek(index);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      reader.nextVector(previousVector, isNull, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skipRows(items);
-    }
-  }
-
-  private static org.slf4j.Logger LOG = org.slf4j.LoggerFactory.getLogger(TreeReaderFactory.class);
-  // This class collects together very similar methods for reading an ORC vector of byte arrays and
-  // creating the BytesColumnVector.
-  //
-  public static class BytesColumnVectorUtil {
-
-    private static byte[] commonReadByteArrays(InStream stream, IntegerReader lengths,
-        LongColumnVector scratchlcv,
-        BytesColumnVector result, final int batchSize) throws IOException {
-      // Read lengths
-      scratchlcv.isNull = result.isNull;  // Notice we are replacing the isNull vector here...
-      scratchlcv.ensureSize(batchSize, false);
-      lengths.nextVector(scratchlcv, scratchlcv.vector, batchSize);
-      int totalLength = 0;
-      if (!scratchlcv.isRepeating) {
-        for (int i = 0; i < batchSize; i++) {
-          if (!scratchlcv.isNull[i]) {
-            totalLength += (int) scratchlcv.vector[i];
-          }
-        }
-      } else {
-        if (!scratchlcv.isNull[0]) {
-          totalLength = (int) (batchSize * scratchlcv.vector[0]);
-        }
-      }
-
-      // Read all the strings for this batch
-      byte[] allBytes = new byte[totalLength];
-      int offset = 0;
-      int len = totalLength;
-      while (len > 0) {
-        int bytesRead = stream.read(allBytes, offset, len);
-        if (bytesRead < 0) {
-          throw new EOFException("Can't finish byte read from " + stream);
-        }
-        len -= bytesRead;
-        offset += bytesRead;
-      }
-
-      return allBytes;
-    }
-
-    // This method has the common code for reading in bytes into a BytesColumnVector.
-    public static void readOrcByteArrays(InStream stream,
-                                         IntegerReader lengths,
-                                         LongColumnVector scratchlcv,
-                                         BytesColumnVector result,
-                                         final int batchSize) throws IOException {
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv,
-            result, (int) batchSize);
-
-        // Too expensive to figure out 'repeating' by comparisons.
-        result.isRepeating = false;
-        int offset = 0;
-        if (!scratchlcv.isRepeating) {
-          for (int i = 0; i < batchSize; i++) {
-            if (!scratchlcv.isNull[i]) {
-              result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
-              offset += scratchlcv.vector[i];
-            } else {
-              result.setRef(i, allBytes, 0, 0);
-            }
-          }
-        } else {
-          for (int i = 0; i < batchSize; i++) {
-            if (!scratchlcv.isNull[i]) {
-              result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
-              offset += scratchlcv.vector[0];
-            } else {
-              result.setRef(i, allBytes, 0, 0);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * A reader for string columns that are direct encoded in the current
-   * stripe.
-   */
-  public static class StringDirectTreeReader extends TreeReader {
-    private static final HadoopShims SHIMS = HadoopShims.Factory.get();
-    protected InStream stream;
-    protected HadoopShims.TextReaderShim data;
-    protected IntegerReader lengths;
-    private final LongColumnVector scratchlcv;
-
-    StringDirectTreeReader(int columnId) throws IOException {
-      this(columnId, null, null, null, null);
-    }
-
-    protected StringDirectTreeReader(int columnId, InStream present, InStream data,
-        InStream length, OrcProto.ColumnEncoding.Kind encoding) throws IOException {
-      super(columnId, present);
-      this.scratchlcv = new LongColumnVector();
-      this.stream = data;
-      if (length != null && encoding != null) {
-        this.lengths = createIntegerReader(encoding, length, false, false);
-        this.data = SHIMS.getTextReaderShim(this.stream);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT &&
-          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA);
-      stream = streams.get(name);
-      data = SHIMS.getTextReaderShim(this.stream);
-      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
-          false, false);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      stream.seek(index);
-      // don't seek data stream
-      lengths.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final BytesColumnVector result = (BytesColumnVector) previousVector;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv,
-          result, batchSize);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      long lengthToSkip = 0;
-      for (int i = 0; i < items; ++i) {
-        lengthToSkip += lengths.next();
-      }
-
-      while (lengthToSkip > 0) {
-        lengthToSkip -= stream.skip(lengthToSkip);
-      }
-    }
-
-    public IntegerReader getLengths() {
-      return lengths;
-    }
-
-    public InStream getStream() {
-      return stream;
-    }
-  }
-
-  /**
-   * A reader for string columns that are dictionary encoded in the current
-   * stripe.
-   */
-  public static class StringDictionaryTreeReader extends TreeReader {
-    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
-    private DynamicByteArray dictionaryBuffer;
-    private int[] dictionaryOffsets;
-    protected IntegerReader reader;
-
-    private byte[] dictionaryBufferInBytesCache = null;
-    private final LongColumnVector scratchlcv;
-
-    StringDictionaryTreeReader(int columnId) throws IOException {
-      this(columnId, null, null, null, null, null);
-    }
-
-    protected StringDictionaryTreeReader(int columnId, InStream present, InStream data,
-        InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding)
-        throws IOException {
-      super(columnId, present);
-      scratchlcv = new LongColumnVector();
-      if (data != null && encoding != null) {
-        this.reader = createIntegerReader(encoding.getKind(), data, false, false);
-      }
-
-      if (dictionary != null && encoding != null) {
-        readDictionaryStream(dictionary);
-      }
-
-      if (length != null && encoding != null) {
-        readDictionaryLengthStream(length, encoding);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY &&
-          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-
-      // read the dictionary blob
-      StreamName name = new StreamName(columnId,
-          OrcProto.Stream.Kind.DICTIONARY_DATA);
-      InStream in = streams.get(name);
-      readDictionaryStream(in);
-
-      // read the lengths
-      name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH);
-      in = streams.get(name);
-      readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId));
-
-      // set up the row reader
-      name = new StreamName(columnId, OrcProto.Stream.Kind.DATA);
-      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(name), false, false);
-    }
-
-    private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding)
-        throws IOException {
-      int dictionarySize = encoding.getDictionarySize();
-      if (in != null) { // Guard against empty LENGTH stream.
-        IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false);
-        int offset = 0;
-        if (dictionaryOffsets == null ||
-            dictionaryOffsets.length < dictionarySize + 1) {
-          dictionaryOffsets = new int[dictionarySize + 1];
-        }
-        for (int i = 0; i < dictionarySize; ++i) {
-          dictionaryOffsets[i] = offset;
-          offset += (int) lenReader.next();
-        }
-        dictionaryOffsets[dictionarySize] = offset;
-        in.close();
-      }
-
-    }
-
-    private void readDictionaryStream(InStream in) throws IOException {
-      if (in != null) { // Guard against empty dictionary stream.
-        if (in.available() > 0) {
-          dictionaryBuffer = new DynamicByteArray(64, in.available());
-          dictionaryBuffer.readAll(in);
-          // Since its start of strip invalidate the cache.
-          dictionaryBufferInBytesCache = null;
-        }
-        in.close();
-      } else {
-        dictionaryBuffer = null;
-      }
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      seek(index[columnId]);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      super.seek(index);
-      reader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      final BytesColumnVector result = (BytesColumnVector) previousVector;
-      int offset;
-      int length;
-
-      // Read present/isNull stream
-      super.nextVector(result, isNull, batchSize);
-
-      if (dictionaryBuffer != null) {
-
-        // Load dictionaryBuffer into cache.
-        if (dictionaryBufferInBytesCache == null) {
-          dictionaryBufferInBytesCache = dictionaryBuffer.get();
-        }
-
-        // Read string offsets
-        scratchlcv.isNull = result.isNull;
-        scratchlcv.ensureSize((int) batchSize, false);
-        reader.nextVector(scratchlcv, scratchlcv.vector, batchSize);
-        if (!scratchlcv.isRepeating) {
-
-          // The vector has non-repeating strings. Iterate thru the batch
-          // and set strings one by one
-          for (int i = 0; i < batchSize; i++) {
-            if (!scratchlcv.isNull[i]) {
-              offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
-              length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
-              result.setRef(i, dictionaryBufferInBytesCache, offset, length);
-            } else {
-              // If the value is null then set offset and length to zero (null string)
-              result.setRef(i, dictionaryBufferInBytesCache, 0, 0);
-            }
-          }
-        } else {
-          // If the value is repeating then just set the first value in the
-          // vector and set the isRepeating flag to true. No need to iterate thru and
-          // set all the elements to the same value
-          offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
-          length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
-          result.setRef(0, dictionaryBufferInBytesCache, offset, length);
-        }
-        result.isRepeating = scratchlcv.isRepeating;
-      } else {
-        if (dictionaryOffsets == null) {
-          // Entire stripe contains null strings.
-          result.isRepeating = true;
-          result.noNulls = false;
-          result.isNull[0] = true;
-          result.setRef(0, EMPTY_BYTE_ARRAY, 0, 0);
-        } else {
-          // stripe contains nulls and empty strings
-          for (int i = 0; i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.setRef(i, EMPTY_BYTE_ARRAY, 0, 0);
-            }
-          }
-        }
-      }
-    }
-
-    int getDictionaryEntryLength(int entry, int offset) {
-      final int length;
-      // if it isn't the last entry, subtract the offsets otherwise use
-      // the buffer length.
-      if (entry < dictionaryOffsets.length - 1) {
-        length = dictionaryOffsets[entry + 1] - offset;
-      } else {
-        length = dictionaryBuffer.size() - offset;
-      }
-      return length;
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      reader.skip(countNonNulls(items));
-    }
-
-    public IntegerReader getReader() {
-      return reader;
-    }
-  }
-
-  public static class CharTreeReader extends StringTreeReader {
-    int maxLength;
-
-    CharTreeReader(int columnId, int maxLength) throws IOException {
-      this(columnId, maxLength, null, null, null, null, null);
-    }
-
-    protected CharTreeReader(int columnId, int maxLength, InStream present, InStream data,
-        InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
-      super(columnId, present, data, length, dictionary, encoding);
-      this.maxLength = maxLength;
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      // Get the vector of strings from StringTreeReader, then make a 2nd pass to
-      // adjust down the length (right trim and truncate) if necessary.
-      super.nextVector(previousVector, isNull, batchSize);
-      BytesColumnVector result = (BytesColumnVector) previousVector;
-      int adjustedDownLen;
-      if (result.isRepeating) {
-        if (result.noNulls || !result.isNull[0]) {
-          adjustedDownLen = StringExpr
-              .rightTrimAndTruncate(result.vector[0], result.start[0], result.length[0], maxLength);
-          if (adjustedDownLen < result.length[0]) {
-            result.setRef(0, result.vector[0], result.start[0], adjustedDownLen);
-          }
-        }
-      } else {
-        if (result.noNulls) {
-          for (int i = 0; i < batchSize; i++) {
-            adjustedDownLen = StringExpr
-                .rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i],
-                    maxLength);
-            if (adjustedDownLen < result.length[i]) {
-              result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
-            }
-          }
-        } else {
-          for (int i = 0; i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              adjustedDownLen = StringExpr
-                  .rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i],
-                      maxLength);
-              if (adjustedDownLen < result.length[i]) {
-                result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  public static class VarcharTreeReader extends StringTreeReader {
-    int maxLength;
-
-    VarcharTreeReader(int columnId, int maxLength) throws IOException {
-      this(columnId, maxLength, null, null, null, null, null);
-    }
-
-    protected VarcharTreeReader(int columnId, int maxLength, InStream present, InStream data,
-        InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
-      super(columnId, present, data, length, dictionary, encoding);
-      this.maxLength = maxLength;
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      // Get the vector of strings from StringTreeReader, then make a 2nd pass to
-      // adjust down the length (truncate) if necessary.
-      super.nextVector(previousVector, isNull, batchSize);
-      BytesColumnVector result = (BytesColumnVector) previousVector;
-
-      int adjustedDownLen;
-      if (result.isRepeating) {
-        if (result.noNulls || !result.isNull[0]) {
-          adjustedDownLen = StringExpr
-              .truncate(result.vector[0], result.start[0], result.length[0], maxLength);
-          if (adjustedDownLen < result.length[0]) {
-            result.setRef(0, result.vector[0], result.start[0], adjustedDownLen);
-          }
-        }
-      } else {
-        if (result.noNulls) {
-          for (int i = 0; i < batchSize; i++) {
-            adjustedDownLen = StringExpr
-                .truncate(result.vector[i], result.start[i], result.length[i], maxLength);
-            if (adjustedDownLen < result.length[i]) {
-              result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
-            }
-          }
-        } else {
-          for (int i = 0; i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              adjustedDownLen = StringExpr
-                  .truncate(result.vector[i], result.start[i], result.length[i], maxLength);
-              if (adjustedDownLen < result.length[i]) {
-                result.setRef(i, result.vector[i], result.start[i], adjustedDownLen);
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  public static class StructTreeReader extends TreeReader {
-    protected final TreeReader[] fields;
-
-    protected StructTreeReader(int columnId,
-                               TypeDescription readerSchema,
-                               SchemaEvolution evolution,
-                               boolean[] included,
-                               boolean skipCorrupt) throws IOException {
-      super(columnId);
-
-      List<TypeDescription> childrenTypes = readerSchema.getChildren();
-      this.fields = new TreeReader[childrenTypes.size()];
-      for (int i = 0; i < fields.length; ++i) {
-        TypeDescription subtype = childrenTypes.get(i);
-        this.fields[i] = createTreeReader(subtype, evolution, included, skipCorrupt);
-      }
-    }
-
-    public TreeReader[] getChildReaders() {
-      return fields;
-    }
-
-    protected StructTreeReader(int columnId, InStream present,
-        OrcProto.ColumnEncoding encoding, TreeReader[] childReaders) throws IOException {
-      super(columnId, present);
-      if (encoding != null) {
-        checkEncoding(encoding);
-      }
-      this.fields = childReaders;
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      super.seek(index);
-      for (TreeReader kid : fields) {
-        if (kid != null) {
-          kid.seek(index);
-        }
-      }
-    }
-
-    @Override
-    public void nextBatch(VectorizedRowBatch batch,
-                          int batchSize) throws IOException {
-      for(int i=0; i < fields.length &&
-          (vectorColumnCount == -1 || i < vectorColumnCount); ++i) {
-        ColumnVector colVector = batch.cols[i];
-        if (colVector != null) {
-          colVector.reset();
-          colVector.ensureSize((int) batchSize, false);
-          fields[i].nextVector(colVector, null, batchSize);
-        }
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      super.nextVector(previousVector, isNull, batchSize);
-      StructColumnVector result = (StructColumnVector) previousVector;
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        result.isRepeating = false;
-
-        // Read all the members of struct as column vectors
-        boolean[] mask = result.noNulls ? null : result.isNull;
-        for (int f = 0; f < fields.length; f++) {
-          if (fields[f] != null) {
-            fields[f].nextVector(result.fields[f], mask, batchSize);
-          }
-        }
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      for (TreeReader field : fields) {
-        if (field != null) {
-          field.startStripe(streams, stripeFooter);
-        }
-      }
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      for (TreeReader field : fields) {
-        if (field != null) {
-          field.skipRows(items);
-        }
-      }
-    }
-  }
-
-  public static class UnionTreeReader extends TreeReader {
-    protected final TreeReader[] fields;
-    protected RunLengthByteReader tags;
-
-    protected UnionTreeReader(int fileColumn,
-                              TypeDescription readerSchema,
-                              SchemaEvolution evolution,
-                              boolean[] included,
-                              boolean skipCorrupt) throws IOException {
-      super(fileColumn);
-      List<TypeDescription> childrenTypes = readerSchema.getChildren();
-      int fieldCount = childrenTypes.size();
-      this.fields = new TreeReader[fieldCount];
-      for (int i = 0; i < fieldCount; ++i) {
-        TypeDescription subtype = childrenTypes.get(i);
-        this.fields[i] = createTreeReader(subtype, evolution, included, skipCorrupt);
-      }
-    }
-
-    protected UnionTreeReader(int columnId, InStream present,
-        OrcProto.ColumnEncoding encoding, TreeReader[] childReaders) throws IOException {
-      super(columnId, present);
-      if (encoding != null) {
-        checkEncoding(encoding);
-      }
-      this.fields = childReaders;
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      super.seek(index);
-      tags.seek(index[columnId]);
-      for (TreeReader kid : fields) {
-        kid.seek(index);
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      UnionColumnVector result = (UnionColumnVector) previousVector;
-      super.nextVector(result, isNull, batchSize);
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        result.isRepeating = false;
-        tags.nextVector(result.noNulls ? null : result.isNull, result.tags,
-            batchSize);
-        boolean[] ignore = new boolean[(int) batchSize];
-        for (int f = 0; f < result.fields.length; ++f) {
-          // build the ignore list for this tag
-          for (int r = 0; r < batchSize; ++r) {
-            ignore[r] = (!result.noNulls && result.isNull[r]) ||
-                result.tags[r] != f;
-          }
-          fields[f].nextVector(result.fields[f], ignore, batchSize);
-        }
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      tags = new RunLengthByteReader(streams.get(new StreamName(columnId,
-          OrcProto.Stream.Kind.DATA)));
-      for (TreeReader field : fields) {
-        if (field != null) {
-          field.startStripe(streams, stripeFooter);
-        }
-      }
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      long[] counts = new long[fields.length];
-      for (int i = 0; i < items; ++i) {
-        counts[tags.next()] += 1;
-      }
-      for (int i = 0; i < counts.length; ++i) {
-        fields[i].skipRows(counts[i]);
-      }
-    }
-  }
-
-  public static class ListTreeReader extends TreeReader {
-    protected final TreeReader elementReader;
-    protected IntegerReader lengths = null;
-
-    protected ListTreeReader(int fileColumn,
-                             TypeDescription readerSchema,
-                             SchemaEvolution evolution,
-                             boolean[] included,
-                             boolean skipCorrupt) throws IOException {
-      super(fileColumn);
-      TypeDescription elementType = readerSchema.getChildren().get(0);
-      elementReader = createTreeReader(elementType, evolution, included,
-          skipCorrupt);
-    }
-
-    protected ListTreeReader(int columnId, InStream present, InStream data,
-        OrcProto.ColumnEncoding encoding, TreeReader elementReader) throws IOException {
-      super(columnId, present);
-      if (data != null && encoding != null) {
-        checkEncoding(encoding);
-        this.lengths = createIntegerReader(encoding.getKind(), data, false, false);
-      }
-      this.elementReader = elementReader;
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      super.seek(index);
-      lengths.seek(index[columnId]);
-      elementReader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previous,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      ListColumnVector result = (ListColumnVector) previous;
-      super.nextVector(result, isNull, batchSize);
-      // if we have some none-null values, then read them
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        lengths.nextVector(result, result.lengths, batchSize);
-        // even with repeating lengths, the list doesn't repeat
-        result.isRepeating = false;
-        // build the offsets vector and figure out how many children to read
-        result.childCount = 0;
-        for (int r = 0; r < batchSize; ++r) {
-          if (result.noNulls || !result.isNull[r]) {
-            result.offsets[r] = result.childCount;
-            result.childCount += result.lengths[r];
-          }
-        }
-        result.child.ensureSize(result.childCount, false);
-        elementReader.nextVector(result.child, null, result.childCount);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId,
-              OrcProto.Stream.Kind.LENGTH)), false, false);
-      if (elementReader != null) {
-        elementReader.startStripe(streams, stripeFooter);
-      }
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      long childSkip = 0;
-      for (long i = 0; i < items; ++i) {
-        childSkip += lengths.next();
-      }
-      elementReader.skipRows(childSkip);
-    }
-  }
-
-  public static class MapTreeReader extends TreeReader {
-    protected final TreeReader keyReader;
-    protected final TreeReader valueReader;
-    protected IntegerReader lengths = null;
-
-    protected MapTreeReader(int fileColumn,
-                            TypeDescription readerSchema,
-                            SchemaEvolution evolution,
-                            boolean[] included,
-                            boolean skipCorrupt) throws IOException {
-      super(fileColumn);
-      TypeDescription keyType = readerSchema.getChildren().get(0);
-      TypeDescription valueType = readerSchema.getChildren().get(1);
-      keyReader = createTreeReader(keyType, evolution, included, skipCorrupt);
-      valueReader = createTreeReader(valueType, evolution, included, skipCorrupt);
-    }
-
-    protected MapTreeReader(int columnId, InStream present, InStream data,
-        OrcProto.ColumnEncoding encoding, TreeReader keyReader, TreeReader valueReader)
-        throws IOException {
-      super(columnId, present);
-      if (data != null && encoding != null) {
-        checkEncoding(encoding);
-        this.lengths = createIntegerReader(encoding.getKind(), data, false, false);
-      }
-      this.keyReader = keyReader;
-      this.valueReader = valueReader;
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-      super.seek(index);
-      lengths.seek(index[columnId]);
-      keyReader.seek(index);
-      valueReader.seek(index);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previous,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      MapColumnVector result = (MapColumnVector) previous;
-      super.nextVector(result, isNull, batchSize);
-      if (result.noNulls || !(result.isRepeating && result.isNull[0])) {
-        lengths.nextVector(result, result.lengths, batchSize);
-        // even with repeating lengths, the map doesn't repeat
-        result.isRepeating = false;
-        // build the offsets vector and figure out how many children to read
-        result.childCount = 0;
-        for (int r = 0; r < batchSize; ++r) {
-          if (result.noNulls || !result.isNull[r]) {
-            result.offsets[r] = result.childCount;
-            result.childCount += result.lengths[r];
-          }
-        }
-        result.keys.ensureSize(result.childCount, false);
-        result.values.ensureSize(result.childCount, false);
-        keyReader.nextVector(result.keys, null, result.childCount);
-        valueReader.nextVector(result.values, null, result.childCount);
-      }
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
-          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
-        throw new IOException("Unknown encoding " + encoding + " in column " +
-            columnId);
-      }
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      super.startStripe(streams, stripeFooter);
-      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
-          streams.get(new StreamName(columnId,
-              OrcProto.Stream.Kind.LENGTH)), false, false);
-      if (keyReader != null) {
-        keyReader.startStripe(streams, stripeFooter);
-      }
-      if (valueReader != null) {
-        valueReader.startStripe(streams, stripeFooter);
-      }
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      items = countNonNulls(items);
-      long childSkip = 0;
-      for (long i = 0; i < items; ++i) {
-        childSkip += lengths.next();
-      }
-      keyReader.skipRows(childSkip);
-      valueReader.skipRows(childSkip);
-    }
-  }
-
-  public static TreeReader createTreeReader(TypeDescription readerType,
-                                            SchemaEvolution evolution,
-                                            boolean[] included,
-                                            boolean skipCorrupt
-                                            ) throws IOException {
-    TypeDescription fileType = evolution.getFileType(readerType);
-    if (fileType == null || !evolution.includeReaderColumn(readerType.getId())){
-      return new NullTreeReader(0);
-    }
-    TypeDescription.Category readerTypeCategory = readerType.getCategory();
-    if (!fileType.equals(readerType) &&
-        (readerTypeCategory != TypeDescription.Category.STRUCT &&
-         readerTypeCategory != TypeDescription.Category.MAP &&
-         readerTypeCategory != TypeDescription.Category.LIST &&
-         readerTypeCategory != TypeDescription.Category.UNION)) {
-      // We only convert complex children.
-      return ConvertTreeReaderFactory.createConvertTreeReader(readerType, evolution,
-          included, skipCorrupt);
-    }
-    switch (readerTypeCategory) {
-      case BOOLEAN:
-        return new BooleanTreeReader(fileType.getId());
-      case BYTE:
-        return new ByteTreeReader(fileType.getId());
-      case DOUBLE:
-        return new DoubleTreeReader(fileType.getId());
-      case FLOAT:
-        return new FloatTreeReader(fileType.getId());
-      case SHORT:
-        return new ShortTreeReader(fileType.getId());
-      case INT:
-        return new IntTreeReader(fileType.getId());
-      case LONG:
-        return new LongTreeReader(fileType.getId(), skipCorrupt);
-      case STRING:
-        return new StringTreeReader(fileType.getId());
-      case CHAR:
-        return new CharTreeReader(fileType.getId(), readerType.getMaxLength());
-      case VARCHAR:
-        return new VarcharTreeReader(fileType.getId(), readerType.getMaxLength());
-      case BINARY:
-        return new BinaryTreeReader(fileType.getId());
-      case TIMESTAMP:
-        return new TimestampTreeReader(fileType.getId(), skipCorrupt);
-      case DATE:
-        return new DateTreeReader(fileType.getId());
-      case DECIMAL:
-        return new DecimalTreeReader(fileType.getId(), readerType.getPrecision(),
-            readerType.getScale());
-      case STRUCT:
-        return new StructTreeReader(fileType.getId(), readerType,
-            evolution, included, skipCorrupt);
-      case LIST:
-        return new ListTreeReader(fileType.getId(), readerType,
-            evolution, included, skipCorrupt);
-      case MAP:
-        return new MapTreeReader(fileType.getId(), readerType, evolution,
-            included, skipCorrupt);
-      case UNION:
-        return new UnionTreeReader(fileType.getId(), readerType,
-            evolution, included, skipCorrupt);
-      default:
-        throw new IllegalArgumentException("Unsupported type " +
-            readerTypeCategory);
-    }
-  }
-}


[04/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestVectorOrcFile.java b/orc/src/test/org/apache/orc/TestVectorOrcFile.java
deleted file mode 100644
index 73abf9e..0000000
--- a/orc/src/test/org/apache/orc/TestVectorOrcFile.java
+++ /dev/null
@@ -1,2789 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import com.google.common.collect.Lists;
-
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.impl.DataReaderProperties;
-import org.apache.orc.impl.MemoryManager;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.impl.RecordReaderImpl;
-import org.apache.orc.impl.RecordReaderUtils;
-import org.apache.orc.tools.TestJsonFileDump;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-import java.io.File;
-import java.io.IOException;
-import java.math.BigInteger;
-import java.nio.ByteBuffer;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import static junit.framework.TestCase.assertNotNull;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-/**
- * Tests for the vectorized reader and writer for ORC files.
- */
-public class TestVectorOrcFile {
-
-  public static class InnerStruct {
-    int int1;
-    Text string1 = new Text();
-    InnerStruct(int int1, Text string1) {
-      this.int1 = int1;
-      this.string1.set(string1);
-    }
-    InnerStruct(int int1, String string1) {
-      this.int1 = int1;
-      this.string1.set(string1);
-    }
-
-    public String toString() {
-      return "{" + int1 + ", " + string1 + "}";
-    }
-  }
-
-  public static class MiddleStruct {
-    List<InnerStruct> list = new ArrayList<InnerStruct>();
-
-    MiddleStruct(InnerStruct... items) {
-      list.clear();
-      list.addAll(Arrays.asList(items));
-    }
-  }
-
-  private static InnerStruct inner(int i, String s) {
-    return new InnerStruct(i, s);
-  }
-
-  private static Map<String, InnerStruct> map(InnerStruct... items)  {
-    Map<String, InnerStruct> result = new HashMap<String, InnerStruct>();
-    for(InnerStruct i: items) {
-      result.put(i.string1.toString(), i);
-    }
-    return result;
-  }
-
-  private static List<InnerStruct> list(InnerStruct... items) {
-    List<InnerStruct> result = new ArrayList<InnerStruct>();
-    result.addAll(Arrays.asList(items));
-    return result;
-  }
-
-  private static BytesWritable bytes(int... items) {
-    BytesWritable result = new BytesWritable();
-    result.setSize(items.length);
-    for(int i=0; i < items.length; ++i) {
-      result.getBytes()[i] = (byte) items[i];
-    }
-    return result;
-  }
-
-  private static byte[] bytesArray(int... items) {
-    byte[] result = new byte[items.length];
-    for(int i=0; i < items.length; ++i) {
-      result[i] = (byte) items[i];
-    }
-    return result;
-  }
-
-  private static ByteBuffer byteBuf(int... items) {
-    ByteBuffer result = ByteBuffer.allocate(items.length);
-    for(int item: items) {
-      result.put((byte) item);
-    }
-    result.flip();
-    return result;
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem () throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestVectorOrcFile." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testReadFormat_0_11() throws Exception {
-    Path oldFilePath =
-        new Path(TestJsonFileDump.getFileFromClasspath("orc-file-11-format.orc"));
-    Reader reader = OrcFile.createReader(oldFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    int stripeCount = 0;
-    int rowCount = 0;
-    long currentOffset = -1;
-    for(StripeInformation stripe : reader.getStripes()) {
-      stripeCount += 1;
-      rowCount += stripe.getNumberOfRows();
-      if (currentOffset < 0) {
-        currentOffset = stripe.getOffset() + stripe.getIndexLength()
-            + stripe.getDataLength() + stripe.getFooterLength();
-      } else {
-        assertEquals(currentOffset, stripe.getOffset());
-        currentOffset += stripe.getIndexLength() + stripe.getDataLength()
-            + stripe.getFooterLength();
-      }
-    }
-    Assert.assertEquals(reader.getNumberOfRows(), rowCount);
-    assertEquals(2, stripeCount);
-
-    // check the stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(7500, stats[1].getNumberOfValues());
-    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
-    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 7500 hasNull: true true: 3750", stats[1].toString());
-
-    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
-    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
-    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
-    assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 7500 hasNull: true min: 1024 max: 2048 sum: 11520000",
-        stats[3].toString());
-
-    assertEquals(Long.MAX_VALUE,
-        ((IntegerColumnStatistics) stats[5]).getMaximum());
-    assertEquals(Long.MAX_VALUE,
-        ((IntegerColumnStatistics) stats[5]).getMinimum());
-    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
-    assertEquals(
-        "count: 7500 hasNull: true min: 9223372036854775807 max: 9223372036854775807",
-        stats[5].toString());
-
-    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum(), 0.0001);
-    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum(), 0.0001);
-    assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
-        0.00001);
-    assertEquals("count: 7500 hasNull: true min: -15.0 max: -5.0 sum: -75000.0",
-        stats[7].toString());
-
-    assertEquals("count: 7500 hasNull: true min: bye max: hi sum: 0", stats[9].toString());
-
-    // check the inspectors
-    TypeDescription schema = reader.getSchema();
-    assertEquals(TypeDescription.Category.STRUCT, schema.getCategory());
-    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
-        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
-        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
-        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
-        + "map:map<string,struct<int1:int,string1:string>>,ts:timestamp,"
-        + "decimal1:decimal(38,10)>", schema.toString());
-    VectorizedRowBatch batch = schema.createRowBatch();
-
-    RecordReader rows = reader.rows();
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
-
-    // check the contents of the first row
-    assertEquals(false, getBoolean(batch, 0));
-    assertEquals(1, getByte(batch, 0));
-    assertEquals(1024, getShort(batch, 0));
-    assertEquals(65536, getInt(batch, 0));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
-    assertEquals(1.0, getFloat(batch, 0), 0.00001);
-    assertEquals(-15.0, getDouble(batch, 0), 0.00001);
-    assertEquals(bytes(0, 1, 2, 3, 4), getBinary(batch, 0));
-    assertEquals("hi", getText(batch, 0).toString());
-
-    StructColumnVector middle = (StructColumnVector) batch.cols[9];
-    ListColumnVector midList = (ListColumnVector) middle.fields[0];
-    StructColumnVector midListStruct = (StructColumnVector) midList.child;
-    LongColumnVector midListInt = (LongColumnVector) midListStruct.fields[0];
-    BytesColumnVector midListStr = (BytesColumnVector) midListStruct.fields[1];
-    ListColumnVector list = (ListColumnVector) batch.cols[10];
-    StructColumnVector listStruct = (StructColumnVector) list.child;
-    LongColumnVector listInts = (LongColumnVector) listStruct.fields[0];
-    BytesColumnVector listStrs = (BytesColumnVector) listStruct.fields[1];
-    MapColumnVector map = (MapColumnVector) batch.cols[11];
-    BytesColumnVector mapKey = (BytesColumnVector) map.keys;
-    StructColumnVector mapValue = (StructColumnVector) map.values;
-    LongColumnVector mapValueInts = (LongColumnVector) mapValue.fields[0];
-    BytesColumnVector mapValueStrs = (BytesColumnVector) mapValue.fields[1];
-    TimestampColumnVector timestamp = (TimestampColumnVector) batch.cols[12];
-    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[13];
-
-    assertEquals(false, middle.isNull[0]);
-    assertEquals(2, midList.lengths[0]);
-    int start = (int) midList.offsets[0];
-    assertEquals(1, midListInt.vector[start]);
-    assertEquals("bye", midListStr.toString(start));
-    assertEquals(2, midListInt.vector[start + 1]);
-    assertEquals("sigh", midListStr.toString(start + 1));
-
-    assertEquals(2, list.lengths[0]);
-    start = (int) list.offsets[0];
-    assertEquals(3, listInts.vector[start]);
-    assertEquals("good", listStrs.toString(start));
-    assertEquals(4, listInts.vector[start + 1]);
-    assertEquals("bad", listStrs.toString(start + 1));
-    assertEquals(0, map.lengths[0]);
-    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
-        timestamp.asScratchTimestamp(0));
-    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")),
-        decs.vector[0]);
-
-    // check the contents of row 7499
-    rows.seekToRow(7499);
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(true, getBoolean(batch, 0));
-    assertEquals(100, getByte(batch, 0));
-    assertEquals(2048, getShort(batch, 0));
-    assertEquals(65536, getInt(batch, 0));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
-    assertEquals(2.0, getFloat(batch, 0), 0.00001);
-    assertEquals(-5.0, getDouble(batch, 0), 0.00001);
-    assertEquals(bytes(), getBinary(batch, 0));
-    assertEquals("bye", getText(batch, 0).toString());
-    assertEquals(false, middle.isNull[0]);
-    assertEquals(2, midList.lengths[0]);
-    start = (int) midList.offsets[0];
-    assertEquals(1, midListInt.vector[start]);
-    assertEquals("bye", midListStr.toString(start));
-    assertEquals(2, midListInt.vector[start + 1]);
-    assertEquals("sigh", midListStr.toString(start + 1));
-    assertEquals(3, list.lengths[0]);
-    start = (int) list.offsets[0];
-    assertEquals(100000000, listInts.vector[start]);
-    assertEquals("cat", listStrs.toString(start));
-    assertEquals(-100000, listInts.vector[start + 1]);
-    assertEquals("in", listStrs.toString(start + 1));
-    assertEquals(1234, listInts.vector[start + 2]);
-    assertEquals("hat", listStrs.toString(start + 2));
-    assertEquals(2, map.lengths[0]);
-    start = (int) map.offsets[0];
-    assertEquals("chani", mapKey.toString(start));
-    assertEquals(5, mapValueInts.vector[start]);
-    assertEquals("chani", mapValueStrs.toString(start));
-    assertEquals("mauddib", mapKey.toString(start + 1));
-    assertEquals(1, mapValueInts.vector[start + 1]);
-    assertEquals("mauddib", mapValueStrs.toString(start + 1));
-    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
-        timestamp.asScratchTimestamp(0));
-    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547457")),
-        decs.vector[0]);
-
-    // handle the close up
-    Assert.assertEquals(false, rows.nextBatch(batch));
-    rows.close();
-  }
-
-  @Test
-  public void testTimestamp() throws Exception {
-    TypeDescription schema = TypeDescription.createTimestamp();
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .bufferSize(10000).version(org.apache.orc.OrcFile.Version.V_0_11));
-    List<Timestamp> tslist = Lists.newArrayList();
-    tslist.add(Timestamp.valueOf("2037-01-01 00:00:00.000999"));
-    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00.000000222"));
-    tslist.add(Timestamp.valueOf("1999-01-01 00:00:00.999999999"));
-    tslist.add(Timestamp.valueOf("1995-01-01 00:00:00.688888888"));
-    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00.1"));
-    tslist.add(Timestamp.valueOf("2010-03-02 00:00:00.000009001"));
-    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00.000002229"));
-    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00.900203003"));
-    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00.800000007"));
-    tslist.add(Timestamp.valueOf("1996-08-02 00:00:00.723100809"));
-    tslist.add(Timestamp.valueOf("1998-11-02 00:00:00.857340643"));
-    tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
-
-    VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
-    TimestampColumnVector vec = new TimestampColumnVector(1024);
-    batch.cols[0] = vec;
-    batch.reset();
-    batch.size = tslist.size();
-    for (int i=0; i < tslist.size(); ++i) {
-      Timestamp ts = tslist.get(i);
-      vec.set(i, ts);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    TimestampColumnVector timestamps = (TimestampColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(tslist.get(idx++).getNanos(),
-            timestamps.asScratchTimestamp(r).getNanos());
-      }
-    }
-    Assert.assertEquals(tslist.size(), rows.getRowNumber());
-    assertEquals(0, writer.getSchema().getMaximumId());
-    boolean[] expected = new boolean[] {false};
-    boolean[] included = OrcUtils.includeColumns("", writer.getSchema());
-    assertEquals(true, Arrays.equals(expected, included));
-  }
-
-  @Test
-  public void testStringAndBinaryStatistics() throws Exception {
-
-    TypeDescription schema = TypeDescription.createStruct()
-        .addField("bytes1", TypeDescription.createBinary())
-        .addField("string1", TypeDescription.createString());
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(100000)
-                                         .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 4;
-    BytesColumnVector field1 = (BytesColumnVector) batch.cols[0];
-    BytesColumnVector field2 = (BytesColumnVector) batch.cols[1];
-    field1.setVal(0, bytesArray(0, 1, 2, 3, 4));
-    field1.setVal(1, bytesArray(0, 1, 2, 3));
-    field1.setVal(2, bytesArray(0, 1, 2, 3, 4, 5));
-    field1.noNulls = false;
-    field1.isNull[3] = true;
-    field2.setVal(0, "foo".getBytes());
-    field2.setVal(1, "bar".getBytes());
-    field2.noNulls = false;
-    field2.isNull[2] = true;
-    field2.setVal(3, "hi".getBytes());
-    writer.addRowBatch(batch);
-    writer.close();
-    schema = writer.getSchema();
-    assertEquals(2, schema.getMaximumId());
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    boolean[] expected = new boolean[] {false, false, true};
-    boolean[] included = OrcUtils.includeColumns("string1", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    expected = new boolean[] {false, false, false};
-    included = OrcUtils.includeColumns("", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    expected = new boolean[] {false, false, false};
-    included = OrcUtils.includeColumns(null, schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    // check the stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(4, stats[0].getNumberOfValues());
-    assertEquals("count: 4 hasNull: false", stats[0].toString());
-
-    assertEquals(3, stats[1].getNumberOfValues());
-    assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
-
-    assertEquals(3, stats[2].getNumberOfValues());
-    assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
-    assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
-    assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
-    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
-        stats[2].toString());
-
-    // check the inspectors
-    batch = reader.getSchema().createRowBatch();
-    BytesColumnVector bytes = (BytesColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-    RecordReader rows = reader.rows();
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(4, batch.size);
-
-    // check the contents of the first row
-    assertEquals(bytes(0,1,2,3,4), getBinary(bytes, 0));
-    assertEquals("foo", strs.toString(0));
-
-    // check the contents of second row
-    assertEquals(bytes(0,1,2,3), getBinary(bytes, 1));
-    assertEquals("bar", strs.toString(1));
-
-    // check the contents of third row
-    assertEquals(bytes(0,1,2,3,4,5), getBinary(bytes, 2));
-    assertNull(strs.toString(2));
-
-    // check the contents of fourth row
-    assertNull(getBinary(bytes, 3));
-    assertEquals("hi", strs.toString(3));
-
-    // handle the close up
-    Assert.assertEquals(false, rows.nextBatch(batch));
-    rows.close();
-  }
-
-
-  @Test
-  public void testStripeLevelStats() throws Exception {
-    TypeDescription schema = TypeDescription.createStruct()
-        .addField("int1", TypeDescription.createInt())
-        .addField("string1", TypeDescription.createString());
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 1000;
-    LongColumnVector field1 = (LongColumnVector) batch.cols[0];
-    BytesColumnVector field2 = (BytesColumnVector) batch.cols[1];
-    field1.isRepeating = true;
-    field2.isRepeating = true;
-    for (int b = 0; b < 11; b++) {
-      if (b >= 5) {
-        if (b >= 10) {
-          field1.vector[0] = 3;
-          field2.setVal(0, "three".getBytes());
-        } else {
-          field1.vector[0] = 2;
-          field2.setVal(0, "two".getBytes());
-        }
-      } else {
-        field1.vector[0] = 1;
-        field2.setVal(0, "one".getBytes());
-      }
-      writer.addRowBatch(batch);
-    }
-
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    schema = writer.getSchema();
-    assertEquals(2, schema.getMaximumId());
-    boolean[] expected = new boolean[] {false, true, false};
-    boolean[] included = OrcUtils.includeColumns("int1", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    List<StripeStatistics> stats = reader.getStripeStatistics();
-    int numStripes = stats.size();
-    assertEquals(3, numStripes);
-    StripeStatistics ss1 = stats.get(0);
-    StripeStatistics ss2 = stats.get(1);
-    StripeStatistics ss3 = stats.get(2);
-
-    assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
-    assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
-    assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues());
-
-    assertEquals(5000, (ss1.getColumnStatistics()[1]).getNumberOfValues());
-    assertEquals(5000, (ss2.getColumnStatistics()[1]).getNumberOfValues());
-    assertEquals(1000, (ss3.getColumnStatistics()[1]).getNumberOfValues());
-    assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMinimum());
-    assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum());
-    assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum());
-    assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMaximum());
-    assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMaximum());
-    assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMaximum());
-    assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum());
-    assertEquals(10000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum());
-    assertEquals(3000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum());
-
-    assertEquals(5000, (ss1.getColumnStatistics()[2]).getNumberOfValues());
-    assertEquals(5000, (ss2.getColumnStatistics()[2]).getNumberOfValues());
-    assertEquals(1000, (ss3.getColumnStatistics()[2]).getNumberOfValues());
-    assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMinimum());
-    assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum());
-    assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum());
-    assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum());
-    assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMaximum());
-    assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum());
-    assertEquals(15000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum());
-    assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum());
-    assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
-
-    RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows();
-    OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null, null).getRowGroupIndex();
-    assertEquals(3, index.length);
-    List<OrcProto.RowIndexEntry> items = index[1].getEntryList();
-    assertEquals(1, items.size());
-    assertEquals(3, items.get(0).getPositionsCount());
-    assertEquals(0, items.get(0).getPositions(0));
-    assertEquals(0, items.get(0).getPositions(1));
-    assertEquals(0, items.get(0).getPositions(2));
-    assertEquals(1,
-                 items.get(0).getStatistics().getIntStatistics().getMinimum());
-    index = recordReader.readRowIndex(1, null, null).getRowGroupIndex();
-    assertEquals(3, index.length);
-    items = index[1].getEntryList();
-    assertEquals(2,
-        items.get(0).getStatistics().getIntStatistics().getMaximum());
-  }
-
-  private static void setInner(StructColumnVector inner, int rowId,
-                               int i, String value) {
-    ((LongColumnVector) inner.fields[0]).vector[rowId] = i;
-    if (value != null) {
-      ((BytesColumnVector) inner.fields[1]).setVal(rowId, value.getBytes());
-    } else {
-      inner.fields[1].isNull[rowId] = true;
-      inner.fields[1].noNulls = false;
-    }
-  }
-
-  private static void checkInner(StructColumnVector inner, int rowId,
-                                 int rowInBatch, int i, String value) {
-    assertEquals("row " + rowId, i,
-        ((LongColumnVector) inner.fields[0]).vector[rowInBatch]);
-    if (value != null) {
-      assertEquals("row " + rowId, value,
-          ((BytesColumnVector) inner.fields[1]).toString(rowInBatch));
-    } else {
-      assertEquals("row " + rowId, true, inner.fields[1].isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, inner.fields[1].noNulls);
-    }
-  }
-
-  private static void setInnerList(ListColumnVector list, int rowId,
-                                   List<InnerStruct> value) {
-    if (value != null) {
-      if (list.childCount + value.size() > list.child.isNull.length) {
-        list.child.ensureSize(list.childCount * 2, true);
-      }
-      list.lengths[rowId] = value.size();
-      list.offsets[rowId] = list.childCount;
-      for (int i = 0; i < list.lengths[rowId]; ++i) {
-        InnerStruct inner = value.get(i);
-        setInner((StructColumnVector) list.child, i + list.childCount,
-            inner.int1, inner.string1.toString());
-      }
-      list.childCount += value.size();
-    } else {
-      list.isNull[rowId] = true;
-      list.noNulls = false;
-    }
-  }
-
-  private static void checkInnerList(ListColumnVector list, int rowId,
-                                     int rowInBatch, List<InnerStruct> value) {
-    if (value != null) {
-      assertEquals("row " + rowId, value.size(), list.lengths[rowInBatch]);
-      int start = (int) list.offsets[rowInBatch];
-      for (int i = 0; i < list.lengths[rowInBatch]; ++i) {
-        InnerStruct inner = value.get(i);
-        checkInner((StructColumnVector) list.child, rowId, i + start,
-            inner.int1, inner.string1.toString());
-      }
-      list.childCount += value.size();
-    } else {
-      assertEquals("row " + rowId, true, list.isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, list.noNulls);
-    }
-  }
-
-  private static void setInnerMap(MapColumnVector map, int rowId,
-                                  Map<String, InnerStruct> value) {
-    if (value != null) {
-      if (map.childCount >= map.keys.isNull.length) {
-        map.keys.ensureSize(map.childCount * 2, true);
-        map.values.ensureSize(map.childCount * 2, true);
-      }
-      map.lengths[rowId] = value.size();
-      int offset = map.childCount;
-      map.offsets[rowId] = offset;
-
-      for (Map.Entry<String, InnerStruct> entry : value.entrySet()) {
-        ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
-        InnerStruct inner = entry.getValue();
-        setInner((StructColumnVector) map.values, offset, inner.int1,
-            inner.string1.toString());
-        offset += 1;
-      }
-      map.childCount = offset;
-    } else {
-      map.isNull[rowId] = true;
-      map.noNulls = false;
-    }
-  }
-
-  private static void checkInnerMap(MapColumnVector map, int rowId,
-                                    int rowInBatch,
-                                    Map<String, InnerStruct> value) {
-    if (value != null) {
-      assertEquals("row " + rowId, value.size(), map.lengths[rowInBatch]);
-      int offset = (int) map.offsets[rowInBatch];
-      for(int i=0; i < value.size(); ++i) {
-        String key = ((BytesColumnVector) map.keys).toString(offset + i);
-        InnerStruct expected = value.get(key);
-        checkInner((StructColumnVector) map.values, rowId, offset + i,
-            expected.int1, expected.string1.toString());
-      }
-    } else {
-      assertEquals("row " + rowId, true, map.isNull[rowId]);
-      assertEquals("row " + rowId, false, map.noNulls);
-    }
-  }
-
-  private static void setMiddleStruct(StructColumnVector middle, int rowId,
-                                      MiddleStruct value) {
-    if (value != null) {
-      setInnerList((ListColumnVector) middle.fields[0], rowId, value.list);
-    } else {
-      middle.isNull[rowId] = true;
-      middle.noNulls = false;
-    }
-  }
-
-  private static void checkMiddleStruct(StructColumnVector middle, int rowId,
-                                        int rowInBatch, MiddleStruct value) {
-    if (value != null) {
-      checkInnerList((ListColumnVector) middle.fields[0], rowId, rowInBatch,
-          value.list);
-    } else {
-      assertEquals("row " + rowId, true, middle.isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, middle.noNulls);
-    }
-  }
-
-  private static void setBigRow(VectorizedRowBatch batch, int rowId,
-                                Boolean b1, Byte b2, Short s1,
-                                Integer i1, Long l1, Float f1,
-                                Double d1, BytesWritable b3, String s2,
-                                MiddleStruct m1, List<InnerStruct> l2,
-                                Map<String, InnerStruct> m2) {
-    ((LongColumnVector) batch.cols[0]).vector[rowId] = b1 ? 1 : 0;
-    ((LongColumnVector) batch.cols[1]).vector[rowId] = b2;
-    ((LongColumnVector) batch.cols[2]).vector[rowId] = s1;
-    ((LongColumnVector) batch.cols[3]).vector[rowId] = i1;
-    ((LongColumnVector) batch.cols[4]).vector[rowId] = l1;
-    ((DoubleColumnVector) batch.cols[5]).vector[rowId] = f1;
-    ((DoubleColumnVector) batch.cols[6]).vector[rowId] = d1;
-    if (b3 != null) {
-      ((BytesColumnVector) batch.cols[7]).setVal(rowId, b3.getBytes(), 0,
-          b3.getLength());
-    } else {
-      batch.cols[7].isNull[rowId] = true;
-      batch.cols[7].noNulls = false;
-    }
-    if (s2 != null) {
-      ((BytesColumnVector) batch.cols[8]).setVal(rowId, s2.getBytes());
-    } else {
-      batch.cols[8].isNull[rowId] = true;
-      batch.cols[8].noNulls = false;
-    }
-    setMiddleStruct((StructColumnVector) batch.cols[9], rowId, m1);
-    setInnerList((ListColumnVector) batch.cols[10], rowId, l2);
-    setInnerMap((MapColumnVector) batch.cols[11], rowId, m2);
-  }
-
-  private static void checkBigRow(VectorizedRowBatch batch,
-                                  int rowInBatch,
-                                  int rowId,
-                                  boolean b1, byte b2, short s1,
-                                  int i1, long l1, float f1,
-                                  double d1, BytesWritable b3, String s2,
-                                  MiddleStruct m1, List<InnerStruct> l2,
-                                  Map<String, InnerStruct> m2) {
-    assertEquals("row " + rowId, b1, getBoolean(batch, rowInBatch));
-    assertEquals("row " + rowId, b2, getByte(batch, rowInBatch));
-    assertEquals("row " + rowId, s1, getShort(batch, rowInBatch));
-    assertEquals("row " + rowId, i1, getInt(batch, rowInBatch));
-    assertEquals("row " + rowId, l1, getLong(batch, rowInBatch));
-    assertEquals("row " + rowId, f1, getFloat(batch, rowInBatch), 0.0001);
-    assertEquals("row " + rowId, d1, getDouble(batch, rowInBatch), 0.0001);
-    if (b3 != null) {
-      BytesColumnVector bytes = (BytesColumnVector) batch.cols[7];
-      assertEquals("row " + rowId, b3.getLength(), bytes.length[rowInBatch]);
-      for(int i=0; i < b3.getLength(); ++i) {
-        assertEquals("row " + rowId + " byte " + i, b3.getBytes()[i],
-            bytes.vector[rowInBatch][bytes.start[rowInBatch] + i]);
-      }
-    } else {
-      assertEquals("row " + rowId, true, batch.cols[7].isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, batch.cols[7].noNulls);
-    }
-    if (s2 != null) {
-      assertEquals("row " + rowId, s2, getText(batch, rowInBatch).toString());
-    } else {
-      assertEquals("row " + rowId, true, batch.cols[8].isNull[rowInBatch]);
-      assertEquals("row " + rowId, false, batch.cols[8].noNulls);
-    }
-    checkMiddleStruct((StructColumnVector) batch.cols[9], rowId, rowInBatch,
-        m1);
-    checkInnerList((ListColumnVector) batch.cols[10], rowId, rowInBatch, l2);
-    checkInnerMap((MapColumnVector) batch.cols[11], rowId, rowInBatch, m2);
-  }
-
-  private static boolean getBoolean(VectorizedRowBatch batch, int rowId) {
-    return ((LongColumnVector) batch.cols[0]).vector[rowId] != 0;
-  }
-
-  private static byte getByte(VectorizedRowBatch batch, int rowId) {
-    return (byte) ((LongColumnVector) batch.cols[1]).vector[rowId];
-  }
-
-  private static short getShort(VectorizedRowBatch batch, int rowId) {
-    return (short) ((LongColumnVector) batch.cols[2]).vector[rowId];
-  }
-
-  private static int getInt(VectorizedRowBatch batch, int rowId) {
-    return (int) ((LongColumnVector) batch.cols[3]).vector[rowId];
-  }
-
-  private static long getLong(VectorizedRowBatch batch, int rowId) {
-    return ((LongColumnVector) batch.cols[4]).vector[rowId];
-  }
-
-  private static float getFloat(VectorizedRowBatch batch, int rowId) {
-    return (float) ((DoubleColumnVector) batch.cols[5]).vector[rowId];
-  }
-
-  private static double getDouble(VectorizedRowBatch batch, int rowId) {
-    return ((DoubleColumnVector) batch.cols[6]).vector[rowId];
-  }
-
-  private static BytesWritable getBinary(BytesColumnVector column, int rowId) {
-    if (column.isRepeating) {
-      rowId = 0;
-    }
-    if (column.noNulls || !column.isNull[rowId]) {
-      return new BytesWritable(Arrays.copyOfRange(column.vector[rowId],
-          column.start[rowId], column.start[rowId] + column.length[rowId]));
-    } else {
-      return null;
-    }
-  }
-
-  private static BytesWritable getBinary(VectorizedRowBatch batch, int rowId) {
-    return getBinary((BytesColumnVector) batch.cols[7], rowId);
-  }
-
-  private static Text getText(BytesColumnVector vector, int rowId) {
-    if (vector.isRepeating) {
-      rowId = 0;
-    }
-    if (vector.noNulls || !vector.isNull[rowId]) {
-      return new Text(Arrays.copyOfRange(vector.vector[rowId],
-          vector.start[rowId], vector.start[rowId] + vector.length[rowId]));
-    } else {
-      return null;
-    }
-  }
-
-  private static Text getText(VectorizedRowBatch batch, int rowId) {
-    return getText((BytesColumnVector) batch.cols[8], rowId);
-  }
-
-  private static InnerStruct getInner(StructColumnVector vector,
-                                      int rowId) {
-    return new InnerStruct(
-        (int) ((LongColumnVector) vector.fields[0]).vector[rowId],
-        getText((BytesColumnVector) vector.fields[1], rowId));
-  }
-
-  private static List<InnerStruct> getList(ListColumnVector cv,
-                                           int rowId) {
-    if (cv.isRepeating) {
-      rowId = 0;
-    }
-    if (cv.noNulls || !cv.isNull[rowId]) {
-      List<InnerStruct> result =
-          new ArrayList<InnerStruct>((int) cv.lengths[rowId]);
-      for(long i=cv.offsets[rowId];
-          i < cv.offsets[rowId] + cv.lengths[rowId]; ++i) {
-        result.add(getInner((StructColumnVector) cv.child, (int) i));
-      }
-      return result;
-    } else {
-      return null;
-    }
-  }
-
-  private static List<InnerStruct> getMidList(VectorizedRowBatch batch,
-                                              int rowId) {
-    return getList((ListColumnVector) ((StructColumnVector) batch.cols[9])
-        .fields[0], rowId);
-  }
-
-  private static List<InnerStruct> getList(VectorizedRowBatch batch,
-                                           int rowId) {
-    return getList((ListColumnVector) batch.cols[10], rowId);
-  }
-
-  private static Map<Text, InnerStruct> getMap(VectorizedRowBatch batch,
-                                               int rowId) {
-    MapColumnVector cv = (MapColumnVector) batch.cols[11];
-    if (cv.isRepeating) {
-      rowId = 0;
-    }
-    if (cv.noNulls || !cv.isNull[rowId]) {
-      Map<Text, InnerStruct> result =
-          new HashMap<Text, InnerStruct>((int) cv.lengths[rowId]);
-      for(long i=cv.offsets[rowId];
-          i < cv.offsets[rowId] + cv.lengths[rowId]; ++i) {
-        result.put(getText((BytesColumnVector) cv.keys, (int) i),
-            getInner((StructColumnVector) cv.values, (int) i));
-      }
-      return result;
-    } else {
-      return null;
-    }
-  }
-
-  private static TypeDescription createInnerSchema() {
-    return TypeDescription.createStruct()
-        .addField("int1", TypeDescription.createInt())
-        .addField("string1", TypeDescription.createString());
-  }
-
-  private static TypeDescription createBigRowSchema() {
-    return TypeDescription.createStruct()
-        .addField("boolean1", TypeDescription.createBoolean())
-        .addField("byte1", TypeDescription.createByte())
-        .addField("short1", TypeDescription.createShort())
-        .addField("int1", TypeDescription.createInt())
-        .addField("long1", TypeDescription.createLong())
-        .addField("float1", TypeDescription.createFloat())
-        .addField("double1", TypeDescription.createDouble())
-        .addField("bytes1", TypeDescription.createBinary())
-        .addField("string1", TypeDescription.createString())
-        .addField("middle", TypeDescription.createStruct()
-            .addField("list", TypeDescription.createList(createInnerSchema())))
-        .addField("list", TypeDescription.createList(createInnerSchema()))
-        .addField("map", TypeDescription.createMap(
-            TypeDescription.createString(),
-            createInnerSchema()));
-  }
-
-  static void assertArrayEquals(boolean[] expected, boolean[] actual) {
-    assertEquals(expected.length, actual.length);
-    boolean diff = false;
-    for(int i=0; i < expected.length; ++i) {
-      if (expected[i] != actual[i]) {
-        System.out.println("Difference at " + i + " expected: " + expected[i] +
-          " actual: " + actual[i]);
-        diff = true;
-      }
-    }
-    assertEquals(false, diff);
-  }
-
-  @Test
-  public void test1() throws Exception {
-    TypeDescription schema = createBigRowSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 2;
-    setBigRow(batch, 0, false, (byte) 1, (short) 1024, 65536,
-        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
-        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
-        list(inner(3, "good"), inner(4, "bad")),
-        map());
-    setBigRow(batch, 1, true, (byte) 100, (short) 2048, 65536,
-        Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
-        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
-        list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
-        map(inner(5, "chani"), inner(1, "mauddib")));
-    writer.addRowBatch(batch);
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    schema = writer.getSchema();
-    assertEquals(23, schema.getMaximumId());
-    boolean[] expected = new boolean[] {false, false, false, false, false,
-        false, false, false, false, false,
-        false, false, false, false, false,
-        false, false, false, false, false,
-        false, false, false, false};
-    boolean[] included = OrcUtils.includeColumns("", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    expected = new boolean[] {false, true, false, false, false,
-        false, false, false, false, true,
-        true, true, true, true, true,
-        false, false, false, false, true,
-        true, true, true, true};
-    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
-
-    assertArrayEquals(expected, included);
-
-    expected = new boolean[] {false, true, false, false, false,
-        false, false, false, false, true,
-        true, true, true, true, true,
-        false, false, false, false, true,
-        true, true, true, true};
-    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
-    assertArrayEquals(expected, included);
-
-    expected = new boolean[] {false, true, true, true, true,
-        true, true, true, true, true,
-        true, true, true, true, true,
-        true, true, true, true, true,
-        true, true, true, true};
-    included = OrcUtils.includeColumns(
-        "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
-        schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    // check the stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(2, stats[1].getNumberOfValues());
-    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
-    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
-
-    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
-    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
-    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
-    assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
-        stats[3].toString());
-
-    StripeStatistics ss = reader.getStripeStatistics().get(0);
-    assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues());
-    assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount());
-    assertEquals(1024, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMinimum());
-    assertEquals(2048, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMaximum());
-    assertEquals(3072, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getSum());
-    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum(), 0.0001);
-    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum(), 0.0001);
-    assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
-    assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
-        stats[7].toString());
-
-    assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
-
-    // check the schema
-    TypeDescription readerSchema = reader.getSchema();
-    assertEquals(TypeDescription.Category.STRUCT, readerSchema.getCategory());
-    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
-        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
-        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
-        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
-        + "map:map<string,struct<int1:int,string1:string>>>",
-        readerSchema.toString());
-    List<String> fieldNames = readerSchema.getFieldNames();
-    List<TypeDescription> fieldTypes = readerSchema.getChildren();
-    assertEquals("boolean1", fieldNames.get(0));
-    assertEquals(TypeDescription.Category.BOOLEAN, fieldTypes.get(0).getCategory());
-    assertEquals("byte1", fieldNames.get(1));
-    assertEquals(TypeDescription.Category.BYTE, fieldTypes.get(1).getCategory());
-    assertEquals("short1", fieldNames.get(2));
-    assertEquals(TypeDescription.Category.SHORT, fieldTypes.get(2).getCategory());
-    assertEquals("int1", fieldNames.get(3));
-    assertEquals(TypeDescription.Category.INT, fieldTypes.get(3).getCategory());
-    assertEquals("long1", fieldNames.get(4));
-    assertEquals(TypeDescription.Category.LONG, fieldTypes.get(4).getCategory());
-    assertEquals("float1", fieldNames.get(5));
-    assertEquals(TypeDescription.Category.FLOAT, fieldTypes.get(5).getCategory());
-    assertEquals("double1", fieldNames.get(6));
-    assertEquals(TypeDescription.Category.DOUBLE, fieldTypes.get(6).getCategory());
-    assertEquals("bytes1", fieldNames.get(7));
-    assertEquals(TypeDescription.Category.BINARY, fieldTypes.get(7).getCategory());
-    assertEquals("string1", fieldNames.get(8));
-    assertEquals(TypeDescription.Category.STRING, fieldTypes.get(8).getCategory());
-    assertEquals("middle", fieldNames.get(9));
-    TypeDescription middle = fieldTypes.get(9);
-    assertEquals(TypeDescription.Category.STRUCT, middle.getCategory());
-    TypeDescription midList = middle.getChildren().get(0);
-    assertEquals(TypeDescription.Category.LIST, midList.getCategory());
-    TypeDescription inner = midList.getChildren().get(0);
-    assertEquals(TypeDescription.Category.STRUCT, inner.getCategory());
-    assertEquals("int1", inner.getFieldNames().get(0));
-    assertEquals("string1", inner.getFieldNames().get(1));
-
-    RecordReader rows = reader.rows();
-    // create a new batch
-    batch = readerSchema.createRowBatch();
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(2, batch.size);
-    Assert.assertEquals(false, rows.nextBatch(batch));
-
-    // check the contents of the first row
-    assertEquals(false, getBoolean(batch, 0));
-    assertEquals(1, getByte(batch, 0));
-    assertEquals(1024, getShort(batch, 0));
-    assertEquals(65536, getInt(batch, 0));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
-    assertEquals(1.0, getFloat(batch, 0), 0.00001);
-    assertEquals(-15.0, getDouble(batch, 0), 0.00001);
-    assertEquals(bytes(0,1,2,3,4), getBinary(batch, 0));
-    assertEquals("hi", getText(batch, 0).toString());
-    List<InnerStruct> midRow = getMidList(batch, 0);
-    assertNotNull(midRow);
-    assertEquals(2, midRow.size());
-    assertEquals(1, midRow.get(0).int1);
-    assertEquals("bye", midRow.get(0).string1.toString());
-    assertEquals(2, midRow.get(1).int1);
-    assertEquals("sigh", midRow.get(1).string1.toString());
-    List<InnerStruct> list = getList(batch, 0);
-    assertEquals(2, list.size());
-    assertEquals(3, list.get(0).int1);
-    assertEquals("good", list.get(0).string1.toString());
-    assertEquals(4, list.get(1).int1);
-    assertEquals("bad", list.get(1).string1.toString());
-    Map<Text, InnerStruct> map = getMap(batch, 0);
-    assertEquals(0, map.size());
-
-    // check the contents of second row
-    assertEquals(true, getBoolean(batch, 1));
-    assertEquals(100, getByte(batch, 1));
-    assertEquals(2048, getShort(batch, 1));
-    assertEquals(65536, getInt(batch, 1));
-    assertEquals(Long.MAX_VALUE, getLong(batch, 1));
-    assertEquals(2.0, getFloat(batch, 1), 0.00001);
-    assertEquals(-5.0, getDouble(batch, 1), 0.00001);
-    assertEquals(bytes(), getBinary(batch, 1));
-    assertEquals("bye", getText(batch, 1).toString());
-    midRow = getMidList(batch, 1);
-    assertNotNull(midRow);
-    assertEquals(2, midRow.size());
-    assertEquals(1, midRow.get(0).int1);
-    assertEquals("bye", midRow.get(0).string1.toString());
-    assertEquals(2, midRow.get(1).int1);
-    assertEquals("sigh", midRow.get(1).string1.toString());
-    list = getList(batch, 1);
-    assertEquals(3, list.size());
-    assertEquals(100000000, list.get(0).int1);
-    assertEquals("cat", list.get(0).string1.toString());
-    assertEquals(-100000, list.get(1).int1);
-    assertEquals("in", list.get(1).string1.toString());
-    assertEquals(1234, list.get(2).int1);
-    assertEquals("hat", list.get(2).string1.toString());
-    map = getMap(batch, 1);
-    assertEquals(2, map.size());
-    InnerStruct value = map.get(new Text("chani"));
-    assertEquals(5, value.int1);
-    assertEquals("chani", value.string1.toString());
-    value = map.get(new Text("mauddib"));
-    assertEquals(1, value.int1);
-    assertEquals("mauddib", value.string1.toString());
-
-    // handle the close up
-    Assert.assertEquals(false, rows.nextBatch(batch));
-    rows.close();
-  }
-
-  @Test
-  public void testColumnProjection() throws Exception {
-    TypeDescription schema = createInnerSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(1000)
-                                         .compress(CompressionKind.NONE)
-                                         .bufferSize(100)
-                                         .rowIndexStride(1000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    Random r1 = new Random(1);
-    Random r2 = new Random(2);
-    int x;
-    int minInt=0, maxInt=0;
-    String y;
-    String minStr = null, maxStr = null;
-    batch.size = 1000;
-    boolean first = true;
-    for(int b=0; b < 21; ++b) {
-      for(int r=0; r < 1000; ++r) {
-        x = r1.nextInt();
-        y = Long.toHexString(r2.nextLong());
-        if (first || x < minInt) {
-          minInt = x;
-        }
-        if (first || x > maxInt) {
-          maxInt = x;
-        }
-        if (first || y.compareTo(minStr) < 0) {
-          minStr = y;
-        }
-        if (first || y.compareTo(maxStr) > 0) {
-          maxStr = y;
-        }
-        first = false;
-        ((LongColumnVector) batch.cols[0]).vector[r] = x;
-        ((BytesColumnVector) batch.cols[1]).setVal(r, y.getBytes());
-      }
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    // check out the statistics
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(3, stats.length);
-    for(ColumnStatistics s: stats) {
-      assertEquals(21000, s.getNumberOfValues());
-      if (s instanceof IntegerColumnStatistics) {
-        assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
-        assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
-      } else if (s instanceof  StringColumnStatistics) {
-        assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
-        assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
-      }
-    }
-
-    // check out the types
-    TypeDescription type = reader.getSchema();
-    assertEquals(TypeDescription.Category.STRUCT, type.getCategory());
-    assertEquals(2, type.getChildren().size());
-    TypeDescription type1 = type.getChildren().get(0);
-    TypeDescription type2 = type.getChildren().get(1);
-    assertEquals(TypeDescription.Category.INT, type1.getCategory());
-    assertEquals(TypeDescription.Category.STRING, type2.getCategory());
-    assertEquals("struct<int1:int,string1:string>", type.toString());
-
-    // read the contents and make sure they match
-    RecordReader rows1 = reader.rows(
-        new Reader.Options().include(new boolean[]{true, true, false}));
-    RecordReader rows2 = reader.rows(
-        new Reader.Options().include(new boolean[]{true, false, true}));
-    r1 = new Random(1);
-    r2 = new Random(2);
-    VectorizedRowBatch batch1 = reader.getSchema().createRowBatch(1000);
-    VectorizedRowBatch batch2 = reader.getSchema().createRowBatch(1000);
-    for(int i = 0; i < 21000; i += 1000) {
-      Assert.assertEquals(true, rows1.nextBatch(batch1));
-      Assert.assertEquals(true, rows2.nextBatch(batch2));
-      assertEquals(1000, batch1.size);
-      assertEquals(1000, batch2.size);
-      for(int j=0; j < 1000; ++j) {
-        assertEquals(r1.nextInt(),
-            ((LongColumnVector) batch1.cols[0]).vector[j]);
-        assertEquals(Long.toHexString(r2.nextLong()),
-            ((BytesColumnVector) batch2.cols[1]).toString(j));
-      }
-    }
-    Assert.assertEquals(false, rows1.nextBatch(batch1));
-    Assert.assertEquals(false, rows2.nextBatch(batch2));
-    rows1.close();
-    rows2.close();
-  }
-
-  @Test
-  public void testEmptyFile() throws Exception {
-    TypeDescription schema = createBigRowSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(1000)
-                                         .compress(CompressionKind.NONE)
-                                         .bufferSize(100));
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
-    Assert.assertEquals(false, reader.rows().nextBatch(batch));
-    Assert.assertEquals(CompressionKind.NONE, reader.getCompressionKind());
-    Assert.assertEquals(0, reader.getNumberOfRows());
-    Assert.assertEquals(0, reader.getCompressionSize());
-    Assert.assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
-    Assert.assertEquals(3, reader.getContentLength());
-    Assert.assertEquals(false, reader.getStripes().iterator().hasNext());
-  }
-
-  @Test
-  public void metaData() throws Exception {
-    TypeDescription schema = createBigRowSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(1000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(100));
-    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127,
-                                              -128));
-    writer.addUserMetadata("clobber", byteBuf(1, 2, 3));
-    writer.addUserMetadata("clobber", byteBuf(4, 3, 2, 1));
-    ByteBuffer bigBuf = ByteBuffer.allocate(40000);
-    Random random = new Random(0);
-    random.nextBytes(bigBuf.array());
-    writer.addUserMetadata("big", bigBuf);
-    bigBuf.position(0);
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 1;
-    setBigRow(batch, 0, true, (byte) 127, (short) 1024, 42,
-        42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
-        null, null, null, null);
-    writer.addRowBatch(batch);
-    writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    Assert.assertEquals(byteBuf(5, 7, 11, 13, 17, 19), reader.getMetadataValue("clobber"));
-    Assert.assertEquals(byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128),
-        reader.getMetadataValue("my.meta"));
-    Assert.assertEquals(bigBuf, reader.getMetadataValue("big"));
-    try {
-      reader.getMetadataValue("unknown");
-      assertTrue(false);
-    } catch (IllegalArgumentException iae) {
-      // PASS
-    }
-    int i = 0;
-    for(String key: reader.getMetadataKeys()) {
-      if ("my.meta".equals(key) ||
-          "clobber".equals(key) ||
-          "big".equals(key)) {
-        i += 1;
-      } else {
-        throw new IllegalArgumentException("unknown key " + key);
-      }
-    }
-    assertEquals(3, i);
-    int numStripes = reader.getStripeStatistics().size();
-    assertEquals(1, numStripes);
-  }
-
-  /**
-   * Generate an ORC file with a range of dates and times.
-   */
-  public void createOrcDateFile(Path file, int minYear, int maxYear
-                                ) throws IOException {
-    TypeDescription schema = TypeDescription.createStruct()
-        .addField("time", TypeDescription.createTimestamp())
-        .addField("date", TypeDescription.createDate());
-    Writer writer = OrcFile.createWriter(file,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .blockPadding(false));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 1000;
-    for (int year = minYear; year < maxYear; ++year) {
-      for (int ms = 1000; ms < 2000; ++ms) {
-        TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0];
-        timestampColVector.set(ms - 1000,
-            Timestamp.valueOf(year +
-                "-05-05 12:34:56." + ms));
-        ((LongColumnVector) batch.cols[1]).vector[ms - 1000] =
-            new DateWritable(new Date(year - 1900, 11, 25)).getDays();
-      }
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    Reader reader = OrcFile.createReader(file,
-        OrcFile.readerOptions(conf));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch(1000);
-    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
-    LongColumnVector dates = (LongColumnVector) batch.cols[1];
-    for (int year = minYear; year < maxYear; ++year) {
-      rows.nextBatch(batch);
-      assertEquals(1000, batch.size);
-      for(int ms = 1000; ms < 2000; ++ms) {
-        StringBuilder buffer = new StringBuilder();
-        times.stringifyValue(buffer, ms - 1000);
-        String expected = Integer.toString(year) + "-05-05 12:34:56.";
-        // suppress the final zeros on the string by dividing by the largest
-        // power of 10 that divides evenly.
-        int roundedMs = ms;
-        for(int round = 1000; round > 0; round /= 10) {
-          if (ms % round == 0) {
-            roundedMs = ms / round;
-            break;
-          }
-        }
-        expected += roundedMs;
-        assertEquals(expected, buffer.toString());
-        assertEquals(Integer.toString(year) + "-12-25",
-            new DateWritable((int) dates.vector[ms - 1000]).toString());
-      }
-    }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
-  }
-
-  @Test
-  public void testDate1900() throws Exception {
-    createOrcDateFile(testFilePath, 1900, 1970);
-  }
-
-  @Test
-  public void testDate2038() throws Exception {
-    createOrcDateFile(testFilePath, 2038, 2250);
-  }
-
-  private static void setUnion(VectorizedRowBatch batch, int rowId,
-                               Timestamp ts, Integer tag, Integer i, String s,
-                               HiveDecimalWritable dec) {
-    UnionColumnVector union = (UnionColumnVector) batch.cols[1];
-    if (ts != null) {
-      TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0];
-      timestampColVector.set(rowId, ts);
-    } else {
-      batch.cols[0].isNull[rowId] = true;
-      batch.cols[0].noNulls = false;
-    }
-    if (tag != null) {
-      union.tags[rowId] = tag;
-      if (tag == 0) {
-        if (i != null) {
-          ((LongColumnVector) union.fields[tag]).vector[rowId] = i;
-        } else {
-          union.fields[tag].isNull[rowId] = true;
-          union.fields[tag].noNulls = false;
-        }
-      } else if (tag == 1) {
-        if (s != null) {
-          ((BytesColumnVector) union.fields[tag]).setVal(rowId, s.getBytes());
-        } else {
-          union.fields[tag].isNull[rowId] = true;
-          union.fields[tag].noNulls = false;
-        }
-      } else {
-        throw new IllegalArgumentException("Bad tag " + tag);
-      }
-    } else {
-      batch.cols[1].isNull[rowId] = true;
-      batch.cols[1].noNulls = false;
-    }
-    if (dec != null) {
-      ((DecimalColumnVector) batch.cols[2]).vector[rowId] = dec;
-    } else {
-      batch.cols[2].isNull[rowId] = true;
-      batch.cols[2].noNulls = false;
-    }
-  }
-
-  /**
-     * We test union, timestamp, and decimal separately since we need to make the
-     * object inspector manually. (The Hive reflection-based doesn't handle
-     * them properly.)
-     */
-  @Test
-  public void testUnionAndTimestamp() throws Exception {
-    TypeDescription schema = TypeDescription.createStruct()
-        .addField("time", TypeDescription.createTimestamp())
-        .addField("union", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createInt())
-            .addUnionChild(TypeDescription.createString()))
-        .addField("decimal", TypeDescription.createDecimal()
-            .withPrecision(38)
-            .withScale(18));
-    HiveDecimal maxValue = HiveDecimal.create("10000000000000000000");
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(1000)
-                                         .compress(CompressionKind.NONE)
-                                         .bufferSize(100)
-                                         .blockPadding(false));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 6;
-    setUnion(batch, 0, Timestamp.valueOf("2000-03-12 15:00:00"), 0, 42, null,
-             new HiveDecimalWritable("12345678.6547456"));
-    setUnion(batch, 1, Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
-        1, null, "hello", new HiveDecimalWritable("-5643.234"));
-
-    setUnion(batch, 2, null, null, null, null, null);
-    setUnion(batch, 3, null, 0, null, null, null);
-    setUnion(batch, 4, null, 1, null, null, null);
-
-    setUnion(batch, 5, Timestamp.valueOf("1970-01-01 00:00:00"), 0, 200000,
-        null, new HiveDecimalWritable("10000000000000000000"));
-    writer.addRowBatch(batch);
-
-    batch.reset();
-    Random rand = new Random(42);
-    for(int i=1970; i < 2038; ++i) {
-      Timestamp ts = Timestamp.valueOf(i + "-05-05 12:34:56." + i);
-      HiveDecimal dec =
-          HiveDecimal.create(new BigInteger(64, rand), rand.nextInt(18));
-      if ((i & 1) == 0) {
-        setUnion(batch, batch.size++, ts, 0, i*i, null,
-            new HiveDecimalWritable(dec));
-      } else {
-        setUnion(batch, batch.size++, ts, 1, null, Integer.toString(i*i),
-            new HiveDecimalWritable(dec));
-      }
-      if (maxValue.compareTo(dec) < 0) {
-        maxValue = dec;
-      }
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-
-    // let's add a lot of constant rows to test the rle
-    batch.size = 1000;
-    for(int c=0; c < batch.cols.length; ++c) {
-      batch.cols[c].setRepeating(true);
-    }
-    ((UnionColumnVector) batch.cols[1]).fields[0].isRepeating = true;
-    setUnion(batch, 0, null, 0, 1732050807, null, null);
-    for(int i=0; i < 5; ++i) {
-      writer.addRowBatch(batch);
-    }
-
-    batch.reset();
-    batch.size = 3;
-    setUnion(batch, 0, null, 0, 0, null, null);
-    setUnion(batch, 1, null, 0, 10, null, null);
-    setUnion(batch, 2, null, 0, 138, null, null);
-    writer.addRowBatch(batch);
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    schema = writer.getSchema();
-    assertEquals(5, schema.getMaximumId());
-    boolean[] expected = new boolean[] {false, false, false, false, false, false};
-    boolean[] included = OrcUtils.includeColumns("", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    expected = new boolean[] {false, true, false, false, false, true};
-    included = OrcUtils.includeColumns("time,decimal", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    expected = new boolean[] {false, false, true, true, true, false};
-    included = OrcUtils.includeColumns("union", schema);
-    assertEquals(true, Arrays.equals(expected, included));
-
-    Assert.assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
-    Assert.assertEquals(5077, reader.getNumberOfRows());
-    DecimalColumnStatistics stats =
-        (DecimalColumnStatistics) reader.getStatistics()[5];
-    assertEquals(71, stats.getNumberOfValues());
-    assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
-    assertEquals(maxValue, stats.getMaximum());
-    // TODO: fix this
-//    assertEquals(null,stats.getSum());
-    int stripeCount = 0;
-    int rowCount = 0;
-    long currentOffset = -1;
-    for(StripeInformation stripe: reader.getStripes()) {
-      stripeCount += 1;
-      rowCount += stripe.getNumberOfRows();
-      if (currentOffset < 0) {
-        currentOffset = stripe.getOffset() + stripe.getLength();
-      } else {
-        assertEquals(currentOffset, stripe.getOffset());
-        currentOffset += stripe.getLength();
-      }
-    }
-    Assert.assertEquals(reader.getNumberOfRows(), rowCount);
-    assertEquals(2, stripeCount);
-    Assert.assertEquals(reader.getContentLength(), currentOffset);
-    RecordReader rows = reader.rows();
-    Assert.assertEquals(0, rows.getRowNumber());
-    Assert.assertEquals(0.0, rows.getProgress(), 0.000001);
-
-    schema = reader.getSchema();
-    batch = schema.createRowBatch(74);
-    Assert.assertEquals(0, rows.getRowNumber());
-    rows.nextBatch(batch);
-    assertEquals(74, batch.size);
-    Assert.assertEquals(74, rows.getRowNumber());
-    TimestampColumnVector ts = (TimestampColumnVector) batch.cols[0];
-    UnionColumnVector union = (UnionColumnVector) batch.cols[1];
-    LongColumnVector longs = (LongColumnVector) union.fields[0];
-    BytesColumnVector strs = (BytesColumnVector) union.fields[1];
-    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[2];
-
-    assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
-        schema.toString());
-    assertEquals("2000-03-12 15:00:00.0", ts.asScratchTimestamp(0).toString());
-    assertEquals(0, union.tags[0]);
-    assertEquals(42, longs.vector[0]);
-    assertEquals("12345678.6547456", decs.vector[0].toString());
-
-    assertEquals("2000-03-20 12:00:00.123456789", ts.asScratchTimestamp(1).toString());
-    assertEquals(1, union.tags[1]);
-    assertEquals("hello", strs.toString(1));
-    assertEquals("-5643.234", decs.vector[1].toString());
-
-    assertEquals(false, ts.noNulls);
-    assertEquals(false, union.noNulls);
-    assertEquals(false, decs.noNulls);
-    assertEquals(true, ts.isNull[2]);
-    assertEquals(true, union.isNull[2]);
-    assertEquals(true, decs.isNull[2]);
-
-    assertEquals(true, ts.isNull[3]);
-    assertEquals(false, union.isNull[3]);
-    assertEquals(0, union.tags[3]);
-    assertEquals(true, longs.isNull[3]);
-    assertEquals(true, decs.isNull[3]);
-
-    assertEquals(true, ts.isNull[4]);
-    assertEquals(false, union.isNull[4]);
-    assertEquals(1, union.tags[4]);
-    assertEquals(true, strs.isNull[4]);
-    assertEquals(true, decs.isNull[4]);
-
-    assertEquals(false, ts.isNull[5]);
-    assertEquals("1970-01-01 00:00:00.0", ts.asScratchTimestamp(5).toString());
-    assertEquals(false, union.isNull[5]);
-    assertEquals(0, union.tags[5]);
-    assertEquals(false, longs.isNull[5]);
-    assertEquals(200000, longs.vector[5]);
-    assertEquals(false, decs.isNull[5]);
-    assertEquals("10000000000000000000", decs.vector[5].toString());
-
-    rand = new Random(42);
-    for(int i=1970; i < 2038; ++i) {
-      int row = 6 + i - 1970;
-      assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
-          ts.asScratchTimestamp(row));
-      if ((i & 1) == 0) {
-        assertEquals(0, union.tags[row]);
-        assertEquals(i*i, longs.vector[row]);
-      } else {
-        assertEquals(1, union.tags[row]);
-        assertEquals(Integer.toString(i * i), strs.toString(row));
-      }
-      assertEquals(new HiveDecimalWritable(HiveDecimal.create(new BigInteger(64, rand),
-                                   rand.nextInt(18))), decs.vector[row]);
-    }
-
-    // rebuild the row batch, so that we can read by 1000 rows
-    batch = schema.createRowBatch(1000);
-    ts = (TimestampColumnVector) batch.cols[0];
-    union = (UnionColumnVector) batch.cols[1];
-    longs = (LongColumnVector) union.fields[0];
-    strs = (BytesColumnVector) union.fields[1];
-    decs = (DecimalColumnVector) batch.cols[2];
-
-    for(int i=0; i < 5; ++i) {
-      rows.nextBatch(batch);
-      assertEquals("batch " + i, 1000, batch.size);
-      assertEquals("batch " + i, false, union.isRepeating);
-      assertEquals("batch " + i, true, union.noNulls);
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals("bad tag at " + i + "." +r, 0, union.tags[r]);
-      }
-      assertEquals("batch " + i, true, longs.isRepeating);
-      assertEquals("batch " + i, 1732050807, longs.vector[0]);
-    }
-
-    rows.nextBatch(batch);
-    assertEquals(3, batch.size);
-    assertEquals(0, union.tags[0]);
-    assertEquals(0, longs.vector[0]);
-    assertEquals(0, union.tags[1]);
-    assertEquals(10, longs.vector[1]);
-    assertEquals(0, union.tags[2]);
-    assertEquals(138, longs.vector[2]);
-
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
-    Assert.assertEquals(1.0, rows.getProgress(), 0.00001);
-    Assert.assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
-    rows.seekToRow(1);
-    rows.nextBatch(batch);
-    assertEquals(1000, batch.size);
-    assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"), ts.asScratchTimestamp(0));
-    assertEquals(1, union.tags[0]);
-    assertEquals("hello", strs.toString(0));
-    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), decs.vector[0]);
-    rows.close();
-  }
-
-  /**
-   * Read and write a randomly generated snappy file.
-   * @throws Exception
-   */
-  @Test
-  public void testSnappy() throws Exception {
-    TypeDescription schema = createInnerSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(1000)
-                                         .compress(CompressionKind.SNAPPY)
-                                         .bufferSize(100));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    Random rand = new Random(12);
-    batch.size = 1000;
-    for(int b=0; b < 10; ++b) {
-      for (int r=0; r < 1000; ++r) {
-        ((LongColumnVector) batch.cols[0]).vector[r] = rand.nextInt();
-        ((BytesColumnVector) batch.cols[1]).setVal(r,
-            Integer.toHexString(rand.nextInt()).getBytes());
-      }
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    Assert.assertEquals(CompressionKind.SNAPPY, reader.getCompressionKind());
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch(1000);
-    rand = new Random(12);
-    LongColumnVector longs = (LongColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-    for(int b=0; b < 10; ++b) {
-      rows.nextBatch(batch);
-      assertEquals(1000, batch.size);
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(rand.nextInt(), longs.vector[r]);
-        assertEquals(Integer.toHexString(rand.nextInt()), strs.toString(r));
-      }
-    }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
-    rows.close();
-  }
-
-  /**
-   * Read and write a randomly generated snappy file.
-   * @throws Exception
-   */
-  @Test
-  public void testWithoutIndex() throws Exception {
-    TypeDescription schema = createInnerSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(5000)
-                                         .compress(CompressionKind.SNAPPY)
-                                         .bufferSize(1000)
-                                         .rowIndexStride(0));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    Random rand = new Random(24);
-    batch.size = 5;
-    for(int c=0; c < batch.cols.length; ++c) {
-      batch.cols[c].setRepeating(true);
-    }
-    for(int i=0; i < 10000; ++i) {
-      ((LongColumnVector) batch.cols[0]).vector[0] = rand.nextInt();
-      ((BytesColumnVector) batch.cols[1])
-          .setVal(0, Integer.toBinaryString(rand.nextInt()).getBytes());
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    Assert.assertEquals(50000, reader.getNumberOfRows());
-    Assert.assertEquals(0, reader.getRowIndexStride());
-    StripeInformation stripe = reader.getStripes().iterator().next();
-    assertEquals(true, stripe.getDataLength() != 0);
-    assertEquals(0, stripe.getIndexLength());
-    RecordReader rows = reader.rows();
-    rand = new Random(24);
-    batch = reader.getSchema().createRowBatch(1000);
-    LongColumnVector longs = (LongColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-    for(int i=0; i < 50; ++i) {
-      rows.nextBatch(batch);
-      assertEquals("batch " + i, 1000, batch.size);
-      for(int j=0; j < 200; ++j) {
-        int intVal = rand.nextInt();
-        String strVal = Integer.toBinaryString(rand.nextInt());
-        for (int k = 0; k < 5; ++k) {
-          assertEquals(intVal, longs.vector[j * 5 + k]);
-          assertEquals(strVal, strs.toString(j * 5 + k));
-        }
-      }
-    }
-    rows.nextBatch(batch);
-    assertEquals(0, batch.size);
-    rows.close();
-  }
-
-  @Test
-  public void testSeek() throws Exception {
-    TypeDescription schema = createBigRowSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(200000)
-                                         .bufferSize(65536)
-                                         .rowIndexStride(1000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    Random rand = new Random(42);
-    final int COUNT=32768;
-    long[] intValues= new long[COUNT];
-    double[] doubleValues = new double[COUNT];
-    String[] stringValues = new String[COUNT];
-    BytesWritable[] byteValues = new BytesWritable[COUNT];
-    String[] words = new String[128];
-    for(int i=0; i < words.length; ++i) {
-      words[i] = Integer.toHexString(rand.nextInt());
-    }
-    for(int i=0; i < COUNT/2; ++i) {
-      intValues[2*i] = rand.nextLong();
-      intValues[2*i+1] = intValues[2*i];
-      stringValues[2*i] = words[rand.nextInt(words.length)];
-      stringValues[2*i+1] = stringValues[2*i];
-    }
-    for(int i=0; i < COUNT; ++i) {
-      doubleValues[i] = rand.nextDouble();
-      byte[] buf = new byte[20];
-      rand.nextBytes(buf);
-      byteValues[i] = new BytesWritable(buf);
-    }
-    for(int i=0; i < COUNT; ++i) {
-      appendRandomRow(batch, intValues, doubleValues, stringValues,
-          byteValues, words, i);
-      if (batch.size == 1024) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-    }
-    if (batch.size != 0) {
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    Assert.assertEquals(COUNT, reader.getNumberOfRows());
-    RecordReader rows = reader.rows();
-    // get the row index
-    DataReader meta = RecordReaderUtils.createDefaultDataReader(
-        DataReaderProperties.builder()
-            .withBufferSize(reader.getCompressionSize())
-            .withFileSystem(fs)
-            .withPath(testFilePath)
-            .withCompression(reader.getCompressionKind())
-            .withTypeCount(reader.getSchema().getMaximumId() + 1)
-            .withZeroCopy(false)
-            .build());
-    OrcIndex index =
-        meta.readRowIndex(reader.getStripes().get(0), null, null, null, null,
-            null);
-    // check the primitive columns to make sure they have the right number of
-    // items in the first row group
-    for(int c=1; c < 9; ++c) {
-      OrcProto.RowIndex colIndex = index.getRowGroupIndex()[c];
-      assertEquals(1000,
-          colIndex.getEntry(0).getStatistics().getNumberOfValues());
-    }
-    batch = reader.getSchema().createRowBatch();
-    int nextRowInBatch = -1;
-    for(int i=COUNT-1; i >= 0; --i, --nextRowInBatch) {
-      // if we have consumed the previous batch read a new one
-      if (nextRowInBatch < 0) {
-        long base = Math.max(i - 1023, 0);
-        rows.seekToRow(base);
-        Assert.assertEquals("row " + i, true, rows.nextBatch(batch));
-        nextRowInBatch = batch.size - 1;
-      }
-      checkRandomRow(batch, intValues, doubleValues,
-          stringValues, byteValues, words, i, nextRowInBatch);
-    }
-    rows.close();
-    Iterator<StripeInformation> stripeIterator =
-      reader.getStripes().iterator();
-    long offsetOfStripe2 = 0;
-    long offsetOfStripe4 = 0;
-    long lastRowOfStripe2 = 0;
-    for(int i = 0; i < 5; ++i) {
-      StripeInformation stripe = stripeIterator.next();
-      if (i < 2) {
-        lastRowOfStripe2 += stripe.getNumberOfRows();
-      } else if (i == 2) {
-        offsetOfStripe2 = stripe.getOffset();
-        lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
-      } else if (i == 4) {
-        offsetOfStripe4 = stripe.getOffset();
-      }
-    }
-    boolean[] columns = new boolean[reader.getStatistics().length];
-    columns[5] = true; // long colulmn
-    columns[9] = true; // text column
-    rows = reader.rows(new Reader.Options()
-        .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
-        .include(columns));
-    rows.seekToRow(lastRowOfStripe2);
-    // we only want two rows
-    batch = reader.getSchema().createRowBatch(2);
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1, batch.size);
-    assertEquals(intValues[(int) lastRowOfStripe2], getLong(batch, 0));
-    assertEquals(stringValues[(int) lastRowOfStripe2],
-        getText(batch, 0).toString());
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(intValues[(int) lastRowOfStripe2 + 1], getLong(batch, 0));
-    assertEquals(stringValues[(int) lastRowOfStripe2 + 1],
-        getText(batch, 0).toString());
-    rows.close();
-  }
-
-  private void appendRandomRow(VectorizedRowBatch batch,
-                               long[] intValues, double[] doubleValues,
-                               String[] stringValues,
-                               BytesWritable[] byteValues,
-                               String[] words, int i) {
-    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
-    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
-        words[i % words.length] + "-x");
-    setBigRow(batch, batch.size++, (intValues[i] & 1) == 0, (byte) intValues[i],
-        (short) intValues[i], (int) intValues[i], intValues[i],
-        (float) doubleValues[i], doubleValues[i], byteValues[i], stringValues[i],
-        new MiddleStruct(inner, inner2), list(), map(inner, inner2));
-  }
-
-  private void checkRandomRow(VectorizedRowBatch batch,
-                              long[] intValues, double[] doubleValues,
-                              String[] stringValues,
-                              BytesWritable[] byteValues,
-                              String[] words, int i, int rowInBatch) {
-    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
-    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
-        words[i % words.length] + "-x");
-    checkBigRow(batch, rowInBatch, i, (intValues[i] & 1) == 0, (byte) intValues[i],
-        (short) intValues[i], (int) intValues[i], intValues[i],
-        (float) doubleValues[i], doubleValues[i], byteValues[i], stringValues[i],
-        new MiddleStruct(inner, inner2), list(), map(inner, inner2));
-  }
-
-  private static class MyMemoryManager extends MemoryManager {
-    final long totalSpace;
-    double rate;
-    Path path = null;
-    long lastAllocation = 0;
-    int rows = 0;
-    Callback callback;
-
-    MyMemoryManager(Configuration conf, long totalSpace, double rate) {
-      super(conf);
-      this.totalSpace = totalSpace;
-      this.rate = rate;
-    }
-
-    @Override
-    public void addWriter(Path path, long requestedAllocation,
-                   Callback callback) {
-      this.path = path;
-      this.lastAllocation = requestedAllocation;
-      this.callback = callback;
-    }
-
-    @Override
-    public synchronized void removeWriter(Path path) {
-      this.path = null;
-      this.lastAllocation = 0;
-    }
-
-    @Override
-    public long getTotalMemoryPool() {
-      return totalSpace;
-    }
-
-    @Override
-    public double getAllocationScale() {
-      return rate;
-    }
-
-    @Override
-    public void addedRow(int count) throws IOException {
-      rows += count;
-      if (rows % 100 == 0) {
-        callback.checkMemory(rate);
-      }
-    }
-  }
-
-  @Test
-  public void testMemoryManagementV11() throws Exception {
-    TypeDescription schema = createInnerSchema();
-    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .compress(CompressionKind.NONE)
-            .stripeSize(50000)
-            .bufferSize(100)
-            .rowIndexStride(0)
-            .memory(memory)
-            .version(OrcFile.Version.V_0_11));
-    assertEquals(testFilePath, memory.path);
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.size = 1;
-    for(int i=0; i < 2500; ++i) {
-      ((LongColumnVector) batch.cols[0]).vector[0] = i * 300;
-      ((BytesColumnVector) batch.cols[1]).setVal(0,
-          Integer.toHexString(10*i).getBytes());
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    assertEquals(null, memory.path);
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    int i = 0;
-    for(StripeInformation stripe: reader.getStripes()) {
-      i += 1;
-      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
-          stripe.getDataLength() < 5000);
-    }
-    assertEquals(25, i);
-    assertEquals(2500, reader.getNumberOfRows());
-  }
-
-  @Test
-  public void testMemoryManagementV12() throws Exception {
-    TypeDescription schema = createInnerSchema();
-    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .compress(CompressionKind.NONE)
-                                         .stripeSize(50000)
-                                         .bufferSize(100)
-                                         .rowIndexStride(0)
-                                         .memory(memory)
-                                         .version(OrcFile.Version.V_0_12));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    assertEquals(testFilePath, memory.path);
-    batch.size = 1;
-    for(int i=0; i < 2500; ++i) {
-      ((LongColumnVector) batch.cols[0]).vector[0] = i * 300;
-      ((BytesColumnVector) batch.cols[1]).setVal(0,
-          Integer.toHexString(10*i).getBytes());
-      writer.addRowBatch(batch);
-    }
-    writer.close();
-    assertEquals(null, memory.path);
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    int i = 0;
-    for(StripeInformation stripe: reader.getStripes()) {
-      i += 1;
-      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
-          stripe.getDataLength() < 5000);
-    }
-    // with HIVE-7832, the dictionaries will be disabled after writing the first
-    // stripe as there are too many distinct values. Hence only 3 stripes as
-    // compared to 25 stripes in version 0.11 (above test case)
-    assertEquals(3, i);
-    assertEquals(2500, reader.getNumberOfRows());
-  }
-
-  @Test
-  public void testPredicatePushdown() throws Exception {
-    TypeDescription schema = createInnerSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(400000L)
-            .compress(CompressionKind.NONE)
-            .bufferSize(500)
-            .rowIndexStride(1000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    batch.ensureSize(3500);
-    batch.size = 3500;
-    for(int i=0; i < 3500; ++i) {
-      ((LongColumnVector) batch.cols[0]).vector[i] = i * 300;
-      ((BytesColumnVector) batch.cols[1]).setVal(i,
-          Integer.toHexString(10*i).getBytes());
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    assertEquals(3500, reader.getNumberOfRows());
-
-    SearchArgument sarg = SearchArgumentFactory.newBuilder()
-        .startAnd()
-          .startNot()
-             .lessThan("int1", PredicateLeaf.Type.LONG, 300000L)
-          .end()
-          .lessThan("int1", PredicateLeaf.Type.LONG, 600000L)
-        .end()
-        .build();
-    RecordReader rows = reader.rows(new Reader.Options()
-        .range(0L, Long.MAX_VALUE)
-        .include(new boolean[]{true, true, true})
-        .searchArgument(sarg, new String[]{null, "int1", "string1"}));
-    batch = reader.getSchema().createRowBatch(2000);
-    LongColumnVector ints = (LongColumnVector) batch.cols[0];
-    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
-
-    Assert.assertEquals(1000L, rows.getRowNumber());
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1000, batch.size);
-
-    for(int i=1000; i < 2000; ++i) {
-      assertEquals(300 * i, ints.vector[i - 1000]);
-      assertEquals(Integer.toHexString(10*i), strs.toString(i - 1000));
-    }
-    Assert.assertEquals(false, rows.nextBatch(batch));
-    Assert.assertEquals(3500, rows.getRowNumber());
-
-    // look through the file with no rows selected
-    sarg = SearchArgumentFactory.newBuilder()
-        .startAnd()
-          .lessThan("int1", PredicateLeaf.Type.LONG, 0L)
-        .end()
-        .build();
-    rows = reader.rows(new Reader.Options()
-        .range(0L, Long.MAX_VALUE)
-        .include(new boolean[]{true, true, true})
-        .searchArgument(sarg, new String[]{null, "int1", "string1"}));
-    Assert.assertEquals(3500L, rows.getRowNumber());
-    assertTrue(!rows.nextBatch(batch));
-
-    // select first 100 and last 100 rows
-    sarg = SearchArgumentFactory.newBuilder()
-        .startOr()
-          .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100)
-          .startNot()
-            .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400)
-          .end()
-        .end()
-        .build();
-    rows = reader.rows(new Reader.Options()
-        .range(0L, Long.MAX_VALUE)
-        .include(new boolean[]{true, true, true})
-        .searchArgument(sarg, new String[]{null, "int1", "string1"}));
-    Assert.assertEquals(0, rows.getRowNumber());
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1000, batch.size);
-    Assert.assertEquals(3000, rows.getRowNumber());
-    for(int i=0; i < 1000; ++i) {
-      assertEquals(300 * i, ints.vector[i]);
-      assertEquals(Integer.toHexString(10*i), strs.toString(i));
-    }
-
-    Assert.assertEquals(true, rows.nextBatch(batch));
-    assertEquals(500, batch.size);
-    Assert.assertEquals(3500, rows.getRowNumber());
-    for(int i=3000; i < 3500; ++i) {
-      assertEquals(300 * i, ints.vector[i - 3000]);
-      assertEquals(Integer.toHexString(10*i), strs.toString(i - 3000));
-    }
-    Assert.assertEquals(false, rows.nextBatch(batch));
-    Assert.assertEquals(3500, rows.getRowNumber());
-  }
-
-  /**
-   * Test all of the types that have distinct ORC writers using the vectorized
-   * writer with different combinations of repeating and null values.
-   * @throws Exception
-   */
-  @T

<TRUNCATED>

[05/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestOrcTimezone2.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestOrcTimezone2.java b/orc/src/test/org/apache/orc/TestOrcTimezone2.java
deleted file mode 100644
index 4a02855..0000000
--- a/orc/src/test/org/apache/orc/TestOrcTimezone2.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Random;
-import java.util.TimeZone;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import com.google.common.collect.Lists;
-
-/**
- *
- */
-@RunWith(Parameterized.class)
-public class TestOrcTimezone2 {
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-  String writerTimeZone;
-  String readerTimeZone;
-  static TimeZone defaultTimeZone = TimeZone.getDefault();
-
-  public TestOrcTimezone2(String writerTZ, String readerTZ) {
-    this.writerTimeZone = writerTZ;
-    this.readerTimeZone = readerTZ;
-  }
-
-  @Parameterized.Parameters
-  public static Collection<Object[]> data() {
-    String[] allTimeZones = TimeZone.getAvailableIDs();
-    Random rand = new Random(123);
-    int len = allTimeZones.length;
-    int n = 500;
-    Object[][] data = new Object[n][];
-    for (int i = 0; i < n; i++) {
-      int wIdx = rand.nextInt(len);
-      int rIdx = rand.nextInt(len);
-      data[i] = new Object[2];
-      data[i][0] = allTimeZones[wIdx];
-      data[i][1] = allTimeZones[rIdx];
-    }
-    return Arrays.asList(data);
-  }
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @After
-  public void restoreTimeZone() {
-    TimeZone.setDefault(defaultTimeZone);
-  }
-
-  @Test
-  public void testTimestampWriter() throws Exception {
-    TypeDescription schema = TypeDescription.createTimestamp();
-
-    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema)
-            .stripeSize(100000).bufferSize(10000));
-    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
-    List<String> ts = Lists.newArrayList();
-    ts.add("2003-01-01 01:00:00.000000222");
-    ts.add("1999-01-01 02:00:00.999999999");
-    ts.add("1995-01-02 03:00:00.688888888");
-    ts.add("2002-01-01 04:00:00.1");
-    ts.add("2010-03-02 05:00:00.000009001");
-    ts.add("2005-01-01 06:00:00.000002229");
-    ts.add("2006-01-01 07:00:00.900203003");
-    ts.add("2003-01-01 08:00:00.800000007");
-    ts.add("1996-08-02 09:00:00.723100809");
-    ts.add("1998-11-02 10:00:00.857340643");
-    ts.add("2008-10-02 11:00:00.0");
-    ts.add("2037-01-01 00:00:00.000999");
-    VectorizedRowBatch batch = schema.createRowBatch();
-    TimestampColumnVector tsc = (TimestampColumnVector) batch.cols[0];
-    for (String t : ts) {
-      tsc.set(batch.size++, Timestamp.valueOf(t));
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    batch = reader.getSchema().createRowBatch();
-    tsc = (TimestampColumnVector) batch.cols[0];
-    while (rows.nextBatch(batch)) {
-      for (int r=0; r < batch.size; ++r) {
-        assertEquals(ts.get(idx++), tsc.asScratchTimestamp(r).toString());
-      }
-    }
-    rows.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestOrcTimezone3.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestOrcTimezone3.java b/orc/src/test/org/apache/orc/TestOrcTimezone3.java
deleted file mode 100644
index 40ab0c9..0000000
--- a/orc/src/test/org/apache/orc/TestOrcTimezone3.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.TimeZone;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import com.google.common.collect.Lists;
-
-import junit.framework.Assert;
-
-/**
- *
- */
-@RunWith(Parameterized.class)
-public class TestOrcTimezone3 {
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-  String writerTimeZone;
-  String readerTimeZone;
-  static TimeZone defaultTimeZone = TimeZone.getDefault();
-
-  public TestOrcTimezone3(String writerTZ, String readerTZ) {
-    this.writerTimeZone = writerTZ;
-    this.readerTimeZone = readerTZ;
-  }
-
-  @Parameterized.Parameters
-  public static Collection<Object[]> data() {
-    List<Object[]> result = Arrays.asList(new Object[][]{
-        {"America/Chicago", "America/Los_Angeles"},
-    });
-    return result;
-  }
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcTimezone3." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @After
-  public void restoreTimeZone() {
-    TimeZone.setDefault(defaultTimeZone);
-  }
-
-  @Test
-  public void testTimestampWriter() throws Exception {
-    TypeDescription schema = TypeDescription.createTimestamp();
-
-    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .bufferSize(10000));
-    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
-    List<String> ts = Lists.newArrayList();
-    ts.add("1969-12-31 16:00:14.007");
-    ts.add("1969-12-31 16:00:06.021");
-    ts.add("1969-12-31 16:00:03.963");
-    VectorizedRowBatch batch = schema.createRowBatch();
-    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
-    for (String t : ts) {
-      times.set(batch.size++, Timestamp.valueOf(t));
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    times = (TimestampColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
-      }
-    }
-    rows.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestStringDictionary.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestStringDictionary.java b/orc/src/test/org/apache/orc/TestStringDictionary.java
deleted file mode 100644
index 46209bb..0000000
--- a/orc/src/test/org/apache/orc/TestStringDictionary.java
+++ /dev/null
@@ -1,290 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import org.apache.orc.impl.RecordReaderImpl;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestStringDictionary {
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testTooManyDistinct() throws Exception {
-    TypeDescription schema = TypeDescription.createString();
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema)
-                                   .compress(CompressionKind.NONE)
-                                   .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    BytesColumnVector col = (BytesColumnVector) batch.cols[0];
-    for (int i = 0; i < 20000; i++) {
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-      col.setVal(batch.size++, String.valueOf(i).getBytes());
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    col = (BytesColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(String.valueOf(idx++), col.toString(r));
-      }
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testHalfDistinct() throws Exception {
-    TypeDescription schema = TypeDescription.createString();
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    Random rand = new Random(123);
-    int[] input = new int[20000];
-    for (int i = 0; i < 20000; i++) {
-      input[i] = rand.nextInt(10000);
-    }
-
-    VectorizedRowBatch batch = schema.createRowBatch();
-    BytesColumnVector col = (BytesColumnVector) batch.cols[0];
-    for (int i = 0; i < 20000; i++) {
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-      col.setVal(batch.size++, String.valueOf(input[i]).getBytes());
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    col = (BytesColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(String.valueOf(input[idx++]), col.toString(r));
-      }
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testTooManyDistinctCheckDisabled() throws Exception {
-    TypeDescription schema = TypeDescription.createString();
-
-    conf.setBoolean(OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getAttribute(), false);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
-    for (int i = 0; i < 20000; i++) {
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-      string.setVal(batch.size++, String.valueOf(i).getBytes());
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    string = (BytesColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(String.valueOf(idx++), string.toString(r));
-      }
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testHalfDistinctCheckDisabled() throws Exception {
-    TypeDescription schema = TypeDescription.createString();
-
-    conf.setBoolean(OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getAttribute(),
-        false);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000));
-    Random rand = new Random(123);
-    int[] input = new int[20000];
-    for (int i = 0; i < 20000; i++) {
-      input[i] = rand.nextInt(10000);
-    }
-    VectorizedRowBatch batch = schema.createRowBatch();
-    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
-    for (int i = 0; i < 20000; i++) {
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-      string.setVal(batch.size++, String.valueOf(input[i]).getBytes());
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    string = (BytesColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(String.valueOf(input[idx++]), string.toString(r));
-      }
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind());
-      }
-    }
-  }
-
-  @Test
-  public void testTooManyDistinctV11AlwaysDictionary() throws Exception {
-    TypeDescription schema = TypeDescription.createString();
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema)
-            .compress(CompressionKind.NONE)
-            .version(OrcFile.Version.V_0_11).bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
-    for (int i = 0; i < 20000; i++) {
-      if (batch.size == batch.getMaxSize()) {
-        writer.addRowBatch(batch);
-        batch.reset();
-      }
-      string.setVal(batch.size++, String.valueOf(i).getBytes());
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    batch = reader.getSchema().createRowBatch();
-    string = (BytesColumnVector) batch.cols[0];
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(String.valueOf(idx++), string.toString(r));
-      }
-    }
-
-    // make sure the encoding type is correct
-    for (StripeInformation stripe : reader.getStripes()) {
-      // hacky but does the job, this casting will work as long this test resides
-      // within the same package as ORC reader
-      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY, encoding.getKind());
-      }
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestTypeDescription.java b/orc/src/test/org/apache/orc/TestTypeDescription.java
deleted file mode 100644
index 27516be..0000000
--- a/orc/src/test/org/apache/orc/TestTypeDescription.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.orc.TypeDescription;
-import org.junit.Test;
-
-public class TestTypeDescription {
-
-  @Test
-  public void testJson() {
-    TypeDescription bin = TypeDescription.createBinary();
-    assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}",
-        bin.toJson());
-    assertEquals("binary", bin.toString());
-    TypeDescription struct = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createInt())
-        .addField("f2", TypeDescription.createString())
-        .addField("f3", TypeDescription.createDecimal());
-    assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>",
-        struct.toString());
-    assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, \"fields\": [\n"
-            + "  \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n"
-            + "  \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n"
-            + "  \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 38, \"scale\": 10}]}",
-        struct.toJson());
-    struct = TypeDescription.createStruct()
-        .addField("f1", TypeDescription.createUnion()
-            .addUnionChild(TypeDescription.createByte())
-            .addUnionChild(TypeDescription.createDecimal()
-                .withPrecision(20).withScale(10)))
-        .addField("f2", TypeDescription.createStruct()
-            .addField("f3", TypeDescription.createDate())
-            .addField("f4", TypeDescription.createDouble())
-            .addField("f5", TypeDescription.createBoolean()))
-        .addField("f6", TypeDescription.createChar().withMaxLength(100));
-    assertEquals("struct<f1:uniontype<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
-        struct.toString());
-    assertEquals(
-        "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
-            "  \"f1\": {\"category\": \"uniontype\", \"id\": 1, \"max\": 3, \"children\": [\n" +
-            "    {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
-            "    {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" +
-            "  \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" +
-            "    \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" +
-            "    \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" +
-            "    \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 7}]},\n" +
-            "  \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, \"length\": 100}]}",
-        struct.toJson());
-  }
-
-  @Test
-  public void testEquals() {
-    TypeDescription type1 =
-        TypeDescription.createStruct()
-        .addField("a", TypeDescription.createInt())
-        .addField("b", TypeDescription.createStruct()
-                         .addField("x", TypeDescription.createString())
-                         .addField("y", TypeDescription.createBinary())
-                         .addField("z", TypeDescription.createDouble()))
-        .addField("c", TypeDescription.createString());
-    assertEquals(0, type1.getId());
-    assertEquals(6, type1.getMaximumId());
-    TypeDescription type2 =
-        TypeDescription.createStruct()
-        .addField("x", TypeDescription.createString())
-        .addField("y", TypeDescription.createBinary())
-        .addField("z", TypeDescription.createDouble());
-    assertEquals(0, type2.getId());
-    assertEquals(3, type2.getMaximumId());
-    assertEquals(type2, type1.getChildren().get(1));
-    assertEquals(type2.hashCode(), type1.getChildren().get(1).hashCode());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestUnrolledBitPack.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestUnrolledBitPack.java b/orc/src/test/org/apache/orc/TestUnrolledBitPack.java
deleted file mode 100644
index ef8fcd0..0000000
--- a/orc/src/test/org/apache/orc/TestUnrolledBitPack.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-
-@RunWith(value = Parameterized.class)
-public class TestUnrolledBitPack {
-
-  private long val;
-
-  public TestUnrolledBitPack(long val) {
-    this.val = val;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { -1 }, { 1 }, { 7 }, { -128 }, { 32000 }, { 8300000 },
-        { Integer.MAX_VALUE }, { 540000000000L }, { 140000000000000L }, { 36000000000000000L },
-        { Long.MAX_VALUE } };
-    return Arrays.asList(data);
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testBitPacking() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { val, 0, val, val, 0, val, 0, val, val, 0, val, 0, val, val, 0, 0,
-        val, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val,
-        0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0,
-        0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0,
-        val, 0, val, 0, 0, val, 0, val, 0, 0, val, val };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (Long l : input) {
-      int row = batch.size++;
-      ((LongColumnVector) batch.cols[0]).vector[row] = l;
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-}


[14/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/WriterImpl.java b/orc/src/java/org/apache/orc/impl/WriterImpl.java
deleted file mode 100644
index d703563..0000000
--- a/orc/src/java/org/apache/orc/impl/WriterImpl.java
+++ /dev/null
@@ -1,2446 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.TreeMap;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.orc.BinaryColumnStatistics;
-import org.apache.orc.BloomFilterIO;
-import org.apache.orc.OrcConf;
-import org.apache.orc.OrcFile;
-import org.apache.orc.OrcProto;
-import org.apache.orc.OrcProto.BloomFilterIndex;
-import org.apache.orc.OrcProto.RowIndex;
-import org.apache.orc.OrcProto.Stream;
-import org.apache.orc.OrcUtils;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.io.Text;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-import com.google.protobuf.ByteString;
-
-/**
- * An ORC file writer. The file is divided into stripes, which is the natural
- * unit of work when reading. Each stripe is buffered in memory until the
- * memory reaches the stripe size and then it is written out broken down by
- * columns. Each column is written by a TreeWriter that is specific to that
- * type of column. TreeWriters may have children TreeWriters that handle the
- * sub-types. Each of the TreeWriters writes the column's data as a set of
- * streams.
- *
- * This class is unsynchronized like most Stream objects, so from the creation
- * of an OrcFile and all access to a single instance has to be from a single
- * thread.
- *
- * There are no known cases where these happen between different threads today.
- *
- * Caveat: the MemoryManager is created during WriterOptions create, that has
- * to be confined to a single thread as well.
- *
- */
-public class WriterImpl implements Writer, MemoryManager.Callback {
-
-  private static final Logger LOG = LoggerFactory.getLogger(WriterImpl.class);
-
-  private static final int MIN_ROW_INDEX_STRIDE = 1000;
-
-  private final Path path;
-  private final int rowIndexStride;
-  private final TypeDescription schema;
-
-  @VisibleForTesting
-  protected final PhysicalWriter physWriter;
-  private int columnCount;
-  private long rowCount = 0;
-  private long rowsInStripe = 0;
-  private long rawDataSize = 0;
-  private int rowsInIndex = 0;
-  private int stripesAtLastFlush = -1;
-  private final List<OrcProto.StripeInformation> stripes =
-    new ArrayList<OrcProto.StripeInformation>();
-  private final Map<String, ByteString> userMetadata =
-    new TreeMap<String, ByteString>();
-  private final StreamFactory streamFactory = new StreamFactory();
-  private final TreeWriter treeWriter;
-  private final boolean buildIndex;
-  private final MemoryManager memoryManager;
-  private final OrcFile.Version version;
-  private final Configuration conf;
-  private final OrcFile.WriterCallback callback;
-  private final OrcFile.WriterContext callbackContext;
-  private final OrcFile.EncodingStrategy encodingStrategy;
-  private final boolean[] bloomFilterColumns;
-  private final double bloomFilterFpp;
-  private boolean writeTimeZone;
-
-  public WriterImpl(FileSystem fs,
-                    Path path,
-                    OrcFile.WriterOptions opts) throws IOException {
-    this(new PhysicalFsWriter(fs, path, opts.getSchema().getMaximumId() + 1, opts), path, opts);
-  }
-
-  public WriterImpl(PhysicalWriter writer,
-                    Path pathForMem,
-                    OrcFile.WriterOptions opts) throws IOException {
-    this.physWriter = writer;
-    this.path = pathForMem;
-    this.conf = opts.getConfiguration();
-    this.schema = opts.getSchema();
-    this.callback = opts.getCallback();
-    if (callback != null) {
-      callbackContext = new OrcFile.WriterContext(){
-
-        @Override
-        public Writer getWriter() {
-          return WriterImpl.this;
-        }
-      };
-    } else {
-      callbackContext = null;
-    }
-    this.version = opts.getVersion();
-    this.encodingStrategy = opts.getEncodingStrategy();
-    this.rowIndexStride = opts.getRowIndexStride();
-    this.memoryManager = opts.getMemoryManager();
-    buildIndex = rowIndexStride > 0;
-    if (version == OrcFile.Version.V_0_11) {
-      /* do not write bloom filters for ORC v11 */
-      this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
-    } else {
-      this.bloomFilterColumns =
-          OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
-    }
-    this.bloomFilterFpp = opts.getBloomFilterFpp();
-    treeWriter = createTreeWriter(schema, streamFactory, false);
-    if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
-      throw new IllegalArgumentException("Row stride must be at least " +
-          MIN_ROW_INDEX_STRIDE);
-    }
-
-    // ensure that we are able to handle callbacks before we register ourselves
-    if (path != null) {
-      memoryManager.addWriter(path, opts.getStripeSize(), this);
-    }
-  }
-
-  @Override
-  public boolean checkMemory(double newScale) throws IOException {
-    long limit = (long) Math.round(physWriter.getPhysicalStripeSize() * newScale);
-    long size = estimateStripeSize();
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("ORC writer " + path + " size = " + size + " limit = " +
-                limit);
-    }
-    if (size > limit) {
-      flushStripe();
-      return true;
-    }
-    return false;
-  }
-
-  private static class RowIndexPositionRecorder implements PositionRecorder {
-    private final OrcProto.RowIndexEntry.Builder builder;
-
-    RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) {
-      this.builder = builder;
-    }
-
-    @Override
-    public void addPosition(long position) {
-      builder.addPositions(position);
-    }
-  }
-
-  /**
-   * Interface from the Writer to the TreeWriters. This limits the visibility
-   * that the TreeWriters have into the Writer.
-   */
-  private class StreamFactory {
-    /**
-     * Create a stream to store part of a column.
-     * @param column the column id for the stream
-     * @param kind the kind of stream
-     * @return The output outStream that the section needs to be written to.
-     * @throws IOException
-     */
-    public OutStream createStream(int column,
-                                  OrcProto.Stream.Kind kind
-                                  ) throws IOException {
-      final StreamName name = new StreamName(column, kind);
-      return physWriter.getOrCreatePhysicalStream(name);
-    }
-
-    public void writeIndex(int column, RowIndex.Builder rowIndex) throws IOException {
-      physWriter.writeIndexStream(new StreamName(column, Stream.Kind.ROW_INDEX), rowIndex);
-    }
-
-    public void writeBloomFilter(
-        int column, BloomFilterIndex.Builder bloomFilterIndex) throws IOException {
-      physWriter.writeBloomFilterStream(
-          new StreamName(column, Stream.Kind.BLOOM_FILTER), bloomFilterIndex);
-    }
-    /**
-     * Get the next column id.
-     * @return a number from 0 to the number of columns - 1
-     */
-    public int getNextColumnId() {
-      return columnCount++;
-    }
-
-    /**
-     * Get the stride rate of the row index.
-     */
-    public int getRowIndexStride() {
-      return rowIndexStride;
-    }
-
-    /**
-     * Should be building the row index.
-     * @return true if we are building the index
-     */
-    public boolean buildIndex() {
-      return buildIndex;
-    }
-
-    /**
-     * Is the ORC file compressed?
-     * @return are the streams compressed
-     */
-    public boolean isCompressed() {
-      return physWriter.isCompressed();
-    }
-
-    /**
-     * Get the encoding strategy to use.
-     * @return encoding strategy
-     */
-    public OrcFile.EncodingStrategy getEncodingStrategy() {
-      return encodingStrategy;
-    }
-
-    /**
-     * Get the bloom filter columns
-     * @return bloom filter columns
-     */
-    public boolean[] getBloomFilterColumns() {
-      return bloomFilterColumns;
-    }
-
-    /**
-     * Get bloom filter false positive percentage.
-     * @return fpp
-     */
-    public double getBloomFilterFPP() {
-      return bloomFilterFpp;
-    }
-
-    /**
-     * Get the writer's configuration.
-     * @return configuration
-     */
-    public Configuration getConfiguration() {
-      return conf;
-    }
-
-    /**
-     * Get the version of the file to write.
-     */
-    public OrcFile.Version getVersion() {
-      return version;
-    }
-
-    public void useWriterTimeZone(boolean val) {
-      writeTimeZone = val;
-    }
-
-    public boolean hasWriterTimeZone() {
-      return writeTimeZone;
-    }
-  }
-
-  /**
-   * The parent class of all of the writers for each column. Each column
-   * is written by an instance of this class. The compound types (struct,
-   * list, map, and union) have children tree writers that write the children
-   * types.
-   */
-  private abstract static class TreeWriter {
-    protected final int id;
-    protected final BitFieldWriter isPresent;
-    private final boolean isCompressed;
-    protected final ColumnStatisticsImpl indexStatistics;
-    protected final ColumnStatisticsImpl stripeColStatistics;
-    private final ColumnStatisticsImpl fileStatistics;
-    protected TreeWriter[] childrenWriters;
-    protected final RowIndexPositionRecorder rowIndexPosition;
-    private final OrcProto.RowIndex.Builder rowIndex;
-    private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
-    protected final BloomFilterIO bloomFilter;
-    protected final boolean createBloomFilter;
-    private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex;
-    private final OrcProto.BloomFilter.Builder bloomFilterEntry;
-    private boolean foundNulls;
-    private OutStream isPresentOutStream;
-    private final List<OrcProto.StripeStatistics.Builder> stripeStatsBuilders;
-    private final StreamFactory streamFactory;
-
-    /**
-     * Create a tree writer.
-     * @param columnId the column id of the column to write
-     * @param schema the row schema
-     * @param streamFactory limited access to the Writer's data.
-     * @param nullable can the value be null?
-     * @throws IOException
-     */
-    TreeWriter(int columnId,
-               TypeDescription schema,
-               StreamFactory streamFactory,
-               boolean nullable) throws IOException {
-      this.streamFactory = streamFactory;
-      this.isCompressed = streamFactory.isCompressed();
-      this.id = columnId;
-      if (nullable) {
-        isPresentOutStream = streamFactory.createStream(id,
-            OrcProto.Stream.Kind.PRESENT);
-        isPresent = new BitFieldWriter(isPresentOutStream, 1);
-      } else {
-        isPresent = null;
-      }
-      this.foundNulls = false;
-      createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
-      indexStatistics = ColumnStatisticsImpl.create(schema);
-      stripeColStatistics = ColumnStatisticsImpl.create(schema);
-      fileStatistics = ColumnStatisticsImpl.create(schema);
-      childrenWriters = new TreeWriter[0];
-      rowIndex = OrcProto.RowIndex.newBuilder();
-      rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
-      rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
-      stripeStatsBuilders = Lists.newArrayList();
-      if (createBloomFilter) {
-        bloomFilterEntry = OrcProto.BloomFilter.newBuilder();
-        bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder();
-        bloomFilter = new BloomFilterIO(streamFactory.getRowIndexStride(),
-            streamFactory.getBloomFilterFPP());
-      } else {
-        bloomFilterEntry = null;
-        bloomFilterIndex = null;
-        bloomFilter = null;
-      }
-    }
-
-    protected OrcProto.RowIndex.Builder getRowIndex() {
-      return rowIndex;
-    }
-
-    protected ColumnStatisticsImpl getStripeStatistics() {
-      return stripeColStatistics;
-    }
-
-    protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() {
-      return rowIndexEntry;
-    }
-
-    IntegerWriter createIntegerWriter(PositionedOutputStream output,
-                                      boolean signed, boolean isDirectV2,
-                                      StreamFactory writer) {
-      if (isDirectV2) {
-        boolean alignedBitpacking = false;
-        if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
-          alignedBitpacking = true;
-        }
-        return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
-      } else {
-        return new RunLengthIntegerWriter(output, signed);
-      }
-    }
-
-    boolean isNewWriteFormat(StreamFactory writer) {
-      return writer.getVersion() != OrcFile.Version.V_0_11;
-    }
-
-    /**
-     * Handle the top level object write.
-     *
-     * This default method is used for all types except structs, which are the
-     * typical case. VectorizedRowBatch assumes the top level object is a
-     * struct, so we use the first column for all other types.
-     * @param batch the batch to write from
-     * @param offset the row to start on
-     * @param length the number of rows to write
-     * @throws IOException
-     */
-    void writeRootBatch(VectorizedRowBatch batch, int offset,
-                        int length) throws IOException {
-      writeBatch(batch.cols[0], offset, length);
-    }
-
-    /**
-     * Write the values from the given vector from offset for length elements.
-     * @param vector the vector to write from
-     * @param offset the first value from the vector to write
-     * @param length the number of values from the vector to write
-     * @throws IOException
-     */
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      if (vector.noNulls) {
-        indexStatistics.increment(length);
-        if (isPresent != null) {
-          for (int i = 0; i < length; ++i) {
-            isPresent.write(1);
-          }
-        }
-      } else {
-        if (vector.isRepeating) {
-          boolean isNull = vector.isNull[0];
-          if (isPresent != null) {
-            for (int i = 0; i < length; ++i) {
-              isPresent.write(isNull ? 0 : 1);
-            }
-          }
-          if (isNull) {
-            foundNulls = true;
-            indexStatistics.setNull();
-          } else {
-            indexStatistics.increment(length);
-          }
-        } else {
-          // count the number of non-null values
-          int nonNullCount = 0;
-          for(int i = 0; i < length; ++i) {
-            boolean isNull = vector.isNull[i + offset];
-            if (!isNull) {
-              nonNullCount += 1;
-            }
-            if (isPresent != null) {
-              isPresent.write(isNull ? 0 : 1);
-            }
-          }
-          indexStatistics.increment(nonNullCount);
-          if (nonNullCount != length) {
-            foundNulls = true;
-            indexStatistics.setNull();
-          }
-        }
-      }
-    }
-
-    private void removeIsPresentPositions() {
-      for(int i=0; i < rowIndex.getEntryCount(); ++i) {
-        OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
-        List<Long> positions = entry.getPositionsList();
-        // bit streams use 3 positions if uncompressed, 4 if compressed
-        positions = positions.subList(isCompressed ? 4 : 3, positions.size());
-        entry.clearPositions();
-        entry.addAllPositions(positions);
-      }
-    }
-
-    /**
-     * Write the stripe out to the file.
-     * @param builder the stripe footer that contains the information about the
-     *                layout of the stripe. The TreeWriter is required to update
-     *                the footer with its information.
-     * @param requiredIndexEntries the number of index entries that are
-     *                             required. this is to check to make sure the
-     *                             row index is well formed.
-     * @throws IOException
-     */
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      if (isPresent != null) {
-        isPresent.flush();
-
-        // if no nulls are found in a stream, then suppress the stream
-        if (!foundNulls) {
-          isPresentOutStream.suppress();
-          // since isPresent bitstream is suppressed, update the index to
-          // remove the positions of the isPresent stream
-          if (streamFactory.buildIndex()) {
-            removeIsPresentPositions();
-          }
-        }
-      }
-
-      // merge stripe-level column statistics to file statistics and write it to
-      // stripe statistics
-      OrcProto.StripeStatistics.Builder stripeStatsBuilder = OrcProto.StripeStatistics.newBuilder();
-      writeStripeStatistics(stripeStatsBuilder, this);
-      stripeStatsBuilders.add(stripeStatsBuilder);
-
-      // reset the flag for next stripe
-      foundNulls = false;
-
-      builder.addColumns(getEncoding());
-      if (streamFactory.hasWriterTimeZone()) {
-        builder.setWriterTimezone(TimeZone.getDefault().getID());
-      }
-      if (streamFactory.buildIndex()) {
-        if (rowIndex.getEntryCount() != requiredIndexEntries) {
-          throw new IllegalArgumentException("Column has wrong number of " +
-               "index entries found: " + rowIndex.getEntryCount() + " expected: " +
-               requiredIndexEntries);
-        }
-        streamFactory.writeIndex(id, rowIndex);
-      }
-
-      rowIndex.clear();
-      rowIndexEntry.clear();
-
-      // write the bloom filter to out stream
-      if (createBloomFilter) {
-        streamFactory.writeBloomFilter(id, bloomFilterIndex);
-        bloomFilterIndex.clear();
-        bloomFilterEntry.clear();
-      }
-    }
-
-    private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder,
-        TreeWriter treeWriter) {
-      treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics);
-      builder.addColStats(treeWriter.stripeColStatistics.serialize().build());
-      treeWriter.stripeColStatistics.reset();
-      for (TreeWriter child : treeWriter.getChildrenWriters()) {
-        writeStripeStatistics(builder, child);
-      }
-    }
-
-    TreeWriter[] getChildrenWriters() {
-      return childrenWriters;
-    }
-
-    /**
-     * Get the encoding for this column.
-     * @return the information about the encoding of this column
-     */
-    OrcProto.ColumnEncoding getEncoding() {
-      return OrcProto.ColumnEncoding.newBuilder().setKind(
-          OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    /**
-     * Create a row index entry with the previous location and the current
-     * index statistics. Also merges the index statistics into the file
-     * statistics before they are cleared. Finally, it records the start of the
-     * next index and ensures all of the children columns also create an entry.
-     * @throws IOException
-     */
-    void createRowIndexEntry() throws IOException {
-      stripeColStatistics.merge(indexStatistics);
-      rowIndexEntry.setStatistics(indexStatistics.serialize());
-      indexStatistics.reset();
-      rowIndex.addEntry(rowIndexEntry);
-      rowIndexEntry.clear();
-      addBloomFilterEntry();
-      recordPosition(rowIndexPosition);
-      for(TreeWriter child: childrenWriters) {
-        child.createRowIndexEntry();
-      }
-    }
-
-    void addBloomFilterEntry() {
-      if (createBloomFilter) {
-        bloomFilterEntry.setNumHashFunctions(bloomFilter.getNumHashFunctions());
-        bloomFilterEntry.addAllBitset(Longs.asList(bloomFilter.getBitSet()));
-        bloomFilterIndex.addBloomFilter(bloomFilterEntry.build());
-        bloomFilter.reset();
-        bloomFilterEntry.clear();
-      }
-    }
-
-    /**
-     * Record the current position in each of this column's streams.
-     * @param recorder where should the locations be recorded
-     * @throws IOException
-     */
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      if (isPresent != null) {
-        isPresent.getPosition(recorder);
-      }
-    }
-
-    /**
-     * Estimate how much memory the writer is consuming excluding the streams.
-     * @return the number of bytes.
-     */
-    long estimateMemory() {
-      long result = 0;
-      for (TreeWriter child: childrenWriters) {
-        result += child.estimateMemory();
-      }
-      return result;
-    }
-  }
-
-  private static class BooleanTreeWriter extends TreeWriter {
-    private final BitFieldWriter writer;
-
-    BooleanTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      PositionedOutputStream out = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.writer = new BitFieldWriter(out, 1);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int value = vec.vector[0] == 0 ? 0 : 1;
-          indexStatistics.updateBoolean(value != 0, length);
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            int value = vec.vector[i + offset] == 0 ? 0 : 1;
-            writer.write(value);
-            indexStatistics.updateBoolean(value != 0, 1);
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      writer.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-  }
-
-  private static class ByteTreeWriter extends TreeWriter {
-    private final RunLengthByteWriter writer;
-
-    ByteTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.writer = new RunLengthByteWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.DATA));
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          byte value = (byte) vec.vector[0];
-          indexStatistics.updateInteger(value, length);
-          if (createBloomFilter) {
-            bloomFilter.addLong(value);
-          }
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            byte value = (byte) vec.vector[i + offset];
-            writer.write(value);
-            indexStatistics.updateInteger(value, 1);
-            if (createBloomFilter) {
-              bloomFilter.addLong(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      writer.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-  }
-
-  private static class IntegerTreeWriter extends TreeWriter {
-    private final IntegerWriter writer;
-    private boolean isDirectV2 = true;
-
-    IntegerTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      OutStream out = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.writer = createIntegerWriter(out, true, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          long value = vec.vector[0];
-          indexStatistics.updateInteger(value, length);
-          if (createBloomFilter) {
-            bloomFilter.addLong(value);
-          }
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            long value = vec.vector[i + offset];
-            writer.write(value);
-            indexStatistics.updateInteger(value, 1);
-            if (createBloomFilter) {
-              bloomFilter.addLong(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      writer.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-  }
-
-  private static class FloatTreeWriter extends TreeWriter {
-    private final PositionedOutputStream stream;
-    private final SerializationUtils utils;
-
-    FloatTreeWriter(int columnId,
-                      TypeDescription schema,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.stream = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.utils = new SerializationUtils();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      DoubleColumnVector vec = (DoubleColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          float value = (float) vec.vector[0];
-          indexStatistics.updateDouble(value);
-          if (createBloomFilter) {
-            bloomFilter.addDouble(value);
-          }
-          for(int i=0; i < length; ++i) {
-            utils.writeFloat(stream, value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            float value = (float) vec.vector[i + offset];
-            utils.writeFloat(stream, value);
-            indexStatistics.updateDouble(value);
-            if (createBloomFilter) {
-              bloomFilter.addDouble(value);
-            }
-          }
-        }
-      }
-    }
-
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      stream.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      stream.getPosition(recorder);
-    }
-  }
-
-  private static class DoubleTreeWriter extends TreeWriter {
-    private final PositionedOutputStream stream;
-    private final SerializationUtils utils;
-
-    DoubleTreeWriter(int columnId,
-                    TypeDescription schema,
-                    StreamFactory writer,
-                    boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.stream = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.utils = new SerializationUtils();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      DoubleColumnVector vec = (DoubleColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          double value = vec.vector[0];
-          indexStatistics.updateDouble(value);
-          if (createBloomFilter) {
-            bloomFilter.addDouble(value);
-          }
-          for(int i=0; i < length; ++i) {
-            utils.writeDouble(stream, value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            double value = vec.vector[i + offset];
-            utils.writeDouble(stream, value);
-            indexStatistics.updateDouble(value);
-            if (createBloomFilter) {
-              bloomFilter.addDouble(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      stream.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      stream.getPosition(recorder);
-    }
-  }
-
-  private static abstract class StringBaseTreeWriter extends TreeWriter {
-    private static final int INITIAL_DICTIONARY_SIZE = 4096;
-    private final OutStream stringOutput;
-    private final IntegerWriter lengthOutput;
-    private final IntegerWriter rowOutput;
-    protected final StringRedBlackTree dictionary =
-        new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
-    protected final DynamicIntArray rows = new DynamicIntArray();
-    protected final PositionedOutputStream directStreamOutput;
-    protected final IntegerWriter directLengthOutput;
-    private final List<OrcProto.RowIndexEntry> savedRowIndex =
-        new ArrayList<OrcProto.RowIndexEntry>();
-    private final boolean buildIndex;
-    private final List<Long> rowIndexValueCount = new ArrayList<Long>();
-    // If the number of keys in a dictionary is greater than this fraction of
-    //the total number of non-null rows, turn off dictionary encoding
-    private final double dictionaryKeySizeThreshold;
-    protected boolean useDictionaryEncoding;
-    private boolean isDirectV2 = true;
-    private boolean doneDictionaryCheck;
-    private final boolean strideDictionaryCheck;
-
-    StringBaseTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      stringOutput = writer.createStream(id,
-          OrcProto.Stream.Kind.DICTIONARY_DATA);
-      lengthOutput = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      rowOutput = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.DATA), false, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-      rowIndexValueCount.add(0L);
-      buildIndex = writer.buildIndex();
-      directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
-      directLengthOutput = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      Configuration conf = writer.getConfiguration();
-      dictionaryKeySizeThreshold =
-          OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf);
-      strideDictionaryCheck =
-          OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf);
-      useDictionaryEncoding =  dictionaryKeySizeThreshold >= 0.000001; // Epsilon.
-      doneDictionaryCheck = !useDictionaryEncoding;
-    }
-
-    private boolean checkDictionaryEncoding() {
-      if (!doneDictionaryCheck) {
-        // Set the flag indicating whether or not to use dictionary encoding
-        // based on whether or not the fraction of distinct keys over number of
-        // non-null rows is less than the configured threshold
-        float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f;
-        useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold;
-        doneDictionaryCheck = true;
-      }
-      return useDictionaryEncoding;
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
-      // checking would not have happened. So do it again here.
-      checkDictionaryEncoding();
-
-      if (useDictionaryEncoding) {
-        flushDictionary();
-      } else {
-        // flushout any left over entries from dictionary
-        if (rows.size() > 0) {
-          flushDictionary();
-        }
-
-        // suppress the stream for every stripe if dictionary is disabled
-        stringOutput.suppress();
-      }
-
-      // we need to build the rowindex before calling super, since it
-      // writes it out.
-      super.writeStripe(builder, requiredIndexEntries);
-      stringOutput.flush();
-      lengthOutput.flush();
-      rowOutput.flush();
-      directStreamOutput.flush();
-      directLengthOutput.flush();
-      // reset all of the fields to be ready for the next stripe.
-      dictionary.clear();
-      savedRowIndex.clear();
-      rowIndexValueCount.clear();
-      recordPosition(rowIndexPosition);
-      rowIndexValueCount.add(0L);
-
-      if (!useDictionaryEncoding) {
-        // record the start positions of first index stride of next stripe i.e
-        // beginning of the direct streams when dictionary is disabled
-        recordDirectStreamPosition();
-      }
-    }
-
-    private void flushDictionary() throws IOException {
-      final int[] dumpOrder = new int[dictionary.size()];
-
-      if (useDictionaryEncoding) {
-        // Write the dictionary by traversing the red-black tree writing out
-        // the bytes and lengths; and creating the map from the original order
-        // to the final sorted order.
-
-        dictionary.visit(new StringRedBlackTree.Visitor() {
-          private int currentId = 0;
-          @Override
-          public void visit(StringRedBlackTree.VisitorContext context
-                           ) throws IOException {
-            context.writeBytes(stringOutput);
-            lengthOutput.write(context.getLength());
-            dumpOrder[context.getOriginalPosition()] = currentId++;
-          }
-        });
-      } else {
-        // for direct encoding, we don't want the dictionary data stream
-        stringOutput.suppress();
-      }
-      int length = rows.size();
-      int rowIndexEntry = 0;
-      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
-      Text text = new Text();
-      // write the values translated into the dump order.
-      for(int i = 0; i <= length; ++i) {
-        // now that we are writing out the row values, we can finalize the
-        // row index
-        if (buildIndex) {
-          while (i == rowIndexValueCount.get(rowIndexEntry) &&
-              rowIndexEntry < savedRowIndex.size()) {
-            OrcProto.RowIndexEntry.Builder base =
-                savedRowIndex.get(rowIndexEntry++).toBuilder();
-            if (useDictionaryEncoding) {
-              rowOutput.getPosition(new RowIndexPositionRecorder(base));
-            } else {
-              PositionRecorder posn = new RowIndexPositionRecorder(base);
-              directStreamOutput.getPosition(posn);
-              directLengthOutput.getPosition(posn);
-            }
-            rowIndex.addEntry(base.build());
-          }
-        }
-        if (i != length) {
-          if (useDictionaryEncoding) {
-            rowOutput.write(dumpOrder[rows.get(i)]);
-          } else {
-            dictionary.getText(text, rows.get(i));
-            directStreamOutput.write(text.getBytes(), 0, text.getLength());
-            directLengthOutput.write(text.getLength());
-          }
-        }
-      }
-      rows.clear();
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      // Returns the encoding used for the last call to writeStripe
-      if (useDictionaryEncoding) {
-        if(isDirectV2) {
-          return OrcProto.ColumnEncoding.newBuilder().setKind(
-              OrcProto.ColumnEncoding.Kind.DICTIONARY_V2).
-              setDictionarySize(dictionary.size()).build();
-        }
-        return OrcProto.ColumnEncoding.newBuilder().setKind(
-            OrcProto.ColumnEncoding.Kind.DICTIONARY).
-            setDictionarySize(dictionary.size()).build();
-      } else {
-        if(isDirectV2) {
-          return OrcProto.ColumnEncoding.newBuilder().setKind(
-              OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-        }
-        return OrcProto.ColumnEncoding.newBuilder().setKind(
-            OrcProto.ColumnEncoding.Kind.DIRECT).build();
-      }
-    }
-
-    /**
-     * This method doesn't call the super method, because unlike most of the
-     * other TreeWriters, this one can't record the position in the streams
-     * until the stripe is being flushed. Therefore it saves all of the entries
-     * and augments them with the final information as the stripe is written.
-     * @throws IOException
-     */
-    @Override
-    void createRowIndexEntry() throws IOException {
-      getStripeStatistics().merge(indexStatistics);
-      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
-      rowIndexEntry.setStatistics(indexStatistics.serialize());
-      indexStatistics.reset();
-      OrcProto.RowIndexEntry base = rowIndexEntry.build();
-      savedRowIndex.add(base);
-      rowIndexEntry.clear();
-      addBloomFilterEntry();
-      recordPosition(rowIndexPosition);
-      rowIndexValueCount.add(Long.valueOf(rows.size()));
-      if (strideDictionaryCheck) {
-        checkDictionaryEncoding();
-      }
-      if (!useDictionaryEncoding) {
-        if (rows.size() > 0) {
-          flushDictionary();
-          // just record the start positions of next index stride
-          recordDirectStreamPosition();
-        } else {
-          // record the start positions of next index stride
-          recordDirectStreamPosition();
-          getRowIndex().addEntry(base);
-        }
-      }
-    }
-
-    private void recordDirectStreamPosition() throws IOException {
-      directStreamOutput.getPosition(rowIndexPosition);
-      directLengthOutput.getPosition(rowIndexPosition);
-    }
-
-    @Override
-    long estimateMemory() {
-      return rows.getSizeInBytes() + dictionary.getSizeInBytes();
-    }
-  }
-
-  private static class StringTreeWriter extends StringBaseTreeWriter {
-    StringTreeWriter(int columnId,
-                   TypeDescription schema,
-                   StreamFactory writer,
-                   boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          if (useDictionaryEncoding) {
-            int id = dictionary.add(vec.vector[0], vec.start[0], vec.length[0]);
-            for(int i=0; i < length; ++i) {
-              rows.add(id);
-            }
-          } else {
-            for(int i=0; i < length; ++i) {
-              directStreamOutput.write(vec.vector[0], vec.start[0],
-                  vec.length[0]);
-              directLengthOutput.write(vec.length[0]);
-            }
-          }
-          indexStatistics.updateString(vec.vector[0], vec.start[0],
-              vec.length[0], length);
-          if (createBloomFilter) {
-            bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            if (useDictionaryEncoding) {
-              rows.add(dictionary.add(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]));
-            } else {
-              directStreamOutput.write(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-              directLengthOutput.write(vec.length[offset + i]);
-            }
-            indexStatistics.updateString(vec.vector[offset + i],
-                vec.start[offset + i], vec.length[offset + i], 1);
-            if (createBloomFilter) {
-              bloomFilter.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Under the covers, char is written to ORC the same way as string.
-   */
-  private static class CharTreeWriter extends StringBaseTreeWriter {
-    private final int itemLength;
-    private final byte[] padding;
-
-    CharTreeWriter(int columnId,
-        TypeDescription schema,
-        StreamFactory writer,
-        boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      itemLength = schema.getMaxLength();
-      padding = new byte[itemLength];
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          byte[] ptr;
-          int ptrOffset;
-          if (vec.length[0] >= itemLength) {
-            ptr = vec.vector[0];
-            ptrOffset = vec.start[0];
-          } else {
-            ptr = padding;
-            ptrOffset = 0;
-            System.arraycopy(vec.vector[0], vec.start[0], ptr, 0,
-                vec.length[0]);
-            Arrays.fill(ptr, vec.length[0], itemLength, (byte) ' ');
-          }
-          if (useDictionaryEncoding) {
-            int id = dictionary.add(ptr, ptrOffset, itemLength);
-            for(int i=0; i < length; ++i) {
-              rows.add(id);
-            }
-          } else {
-            for(int i=0; i < length; ++i) {
-              directStreamOutput.write(ptr, ptrOffset, itemLength);
-              directLengthOutput.write(itemLength);
-            }
-          }
-          indexStatistics.updateString(ptr, ptrOffset, itemLength, length);
-          if (createBloomFilter) {
-            bloomFilter.addBytes(ptr, ptrOffset, itemLength);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            byte[] ptr;
-            int ptrOffset;
-            if (vec.length[offset + i] >= itemLength) {
-              ptr = vec.vector[offset + i];
-              ptrOffset = vec.start[offset + i];
-            } else {
-              // it is the wrong length, so copy it
-              ptr = padding;
-              ptrOffset = 0;
-              System.arraycopy(vec.vector[offset + i], vec.start[offset + i],
-                  ptr, 0, vec.length[offset + i]);
-              Arrays.fill(ptr, vec.length[offset + i], itemLength, (byte) ' ');
-            }
-            if (useDictionaryEncoding) {
-              rows.add(dictionary.add(ptr, ptrOffset, itemLength));
-            } else {
-              directStreamOutput.write(ptr, ptrOffset, itemLength);
-              directLengthOutput.write(itemLength);
-            }
-            indexStatistics.updateString(ptr, ptrOffset, itemLength, 1);
-            if (createBloomFilter) {
-              bloomFilter.addBytes(ptr, ptrOffset, itemLength);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Under the covers, varchar is written to ORC the same way as string.
-   */
-  private static class VarcharTreeWriter extends StringBaseTreeWriter {
-    private final int maxLength;
-
-    VarcharTreeWriter(int columnId,
-        TypeDescription schema,
-        StreamFactory writer,
-        boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      maxLength = schema.getMaxLength();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int itemLength = Math.min(vec.length[0], maxLength);
-          if (useDictionaryEncoding) {
-            int id = dictionary.add(vec.vector[0], vec.start[0], itemLength);
-            for(int i=0; i < length; ++i) {
-              rows.add(id);
-            }
-          } else {
-            for(int i=0; i < length; ++i) {
-              directStreamOutput.write(vec.vector[0], vec.start[0],
-                  itemLength);
-              directLengthOutput.write(itemLength);
-            }
-          }
-          indexStatistics.updateString(vec.vector[0], vec.start[0],
-              itemLength, length);
-          if (createBloomFilter) {
-            bloomFilter.addBytes(vec.vector[0], vec.start[0], itemLength);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            int itemLength = Math.min(vec.length[offset + i], maxLength);
-            if (useDictionaryEncoding) {
-              rows.add(dictionary.add(vec.vector[offset + i],
-                  vec.start[offset + i], itemLength));
-            } else {
-              directStreamOutput.write(vec.vector[offset + i],
-                  vec.start[offset + i], itemLength);
-              directLengthOutput.write(itemLength);
-            }
-            indexStatistics.updateString(vec.vector[offset + i],
-                vec.start[offset + i], itemLength, 1);
-            if (createBloomFilter) {
-              bloomFilter.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], itemLength);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  private static class BinaryTreeWriter extends TreeWriter {
-    private final PositionedOutputStream stream;
-    private final IntegerWriter length;
-    private boolean isDirectV2 = true;
-
-    BinaryTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.stream = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.length = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      BytesColumnVector vec = (BytesColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          for(int i=0; i < length; ++i) {
-            stream.write(vec.vector[0], vec.start[0],
-                  vec.length[0]);
-            this.length.write(vec.length[0]);
-          }
-          indexStatistics.updateBinary(vec.vector[0], vec.start[0],
-              vec.length[0], length);
-          if (createBloomFilter) {
-            bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            stream.write(vec.vector[offset + i],
-                vec.start[offset + i], vec.length[offset + i]);
-            this.length.write(vec.length[offset + i]);
-            indexStatistics.updateBinary(vec.vector[offset + i],
-                vec.start[offset + i], vec.length[offset + i], 1);
-            if (createBloomFilter) {
-              bloomFilter.addBytes(vec.vector[offset + i],
-                  vec.start[offset + i], vec.length[offset + i]);
-            }
-          }
-        }
-      }
-    }
-
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      stream.flush();
-      length.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      stream.getPosition(recorder);
-      length.getPosition(recorder);
-    }
-  }
-
-  public static long MILLIS_PER_DAY = 24 * 60 * 60 * 1000;
-  public static long NANOS_PER_MILLI = 1000000;
-  public static final int MILLIS_PER_SECOND = 1000;
-  static final int NANOS_PER_SECOND = 1000000000;
-  public static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
-
-  private static class TimestampTreeWriter extends TreeWriter {
-    private final IntegerWriter seconds;
-    private final IntegerWriter nanos;
-    private final boolean isDirectV2;
-    private final long base_timestamp;
-
-    TimestampTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.seconds = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.DATA), true, isDirectV2, writer);
-      this.nanos = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-      // for unit tests to set different time zones
-      this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND;
-      writer.useWriterTimeZone(true);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      TimestampColumnVector vec = (TimestampColumnVector) vector;
-      Timestamp val;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          val = vec.asScratchTimestamp(0);
-          long millis = val.getTime();
-          indexStatistics.updateTimestamp(millis);
-          if (createBloomFilter) {
-            bloomFilter.addLong(millis);
-          }
-          final long secs = millis / MILLIS_PER_SECOND - base_timestamp;
-          final long nano = formatNanos(val.getNanos());
-          for(int i=0; i < length; ++i) {
-            seconds.write(secs);
-            nanos.write(nano);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            val = vec.asScratchTimestamp(i + offset);
-            long millis = val.getTime();
-            long secs = millis / MILLIS_PER_SECOND - base_timestamp;
-            seconds.write(secs);
-            nanos.write(formatNanos(val.getNanos()));
-            indexStatistics.updateTimestamp(millis);
-            if (createBloomFilter) {
-              bloomFilter.addLong(millis);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      seconds.flush();
-      nanos.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    private static long formatNanos(int nanos) {
-      if (nanos == 0) {
-        return 0;
-      } else if (nanos % 100 != 0) {
-        return ((long) nanos) << 3;
-      } else {
-        nanos /= 100;
-        int trailingZeros = 1;
-        while (nanos % 10 == 0 && trailingZeros < 7) {
-          nanos /= 10;
-          trailingZeros += 1;
-        }
-        return ((long) nanos) << 3 | trailingZeros;
-      }
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      seconds.getPosition(recorder);
-      nanos.getPosition(recorder);
-    }
-  }
-
-  private static class DateTreeWriter extends TreeWriter {
-    private final IntegerWriter writer;
-    private final boolean isDirectV2;
-
-    DateTreeWriter(int columnId,
-                   TypeDescription schema,
-                   StreamFactory writer,
-                   boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      OutStream out = writer.createStream(id,
-          OrcProto.Stream.Kind.DATA);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      this.writer = createIntegerWriter(out, true, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      LongColumnVector vec = (LongColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int value = (int) vec.vector[0];
-          indexStatistics.updateDate(value);
-          if (createBloomFilter) {
-            bloomFilter.addLong(value);
-          }
-          for(int i=0; i < length; ++i) {
-            writer.write(value);
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            int value = (int) vec.vector[i + offset];
-            writer.write(value);
-            indexStatistics.updateDate(value);
-            if (createBloomFilter) {
-              bloomFilter.addLong(value);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      writer.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      writer.getPosition(recorder);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-  }
-
-  private static class DecimalTreeWriter extends TreeWriter {
-    private final PositionedOutputStream valueStream;
-
-    // These scratch buffers allow us to serialize decimals much faster.
-    private final long[] scratchLongs;
-    private final byte[] scratchBuffer;
-
-    private final IntegerWriter scaleStream;
-    private final boolean isDirectV2;
-
-    DecimalTreeWriter(int columnId,
-                        TypeDescription schema,
-                        StreamFactory writer,
-                        boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA);
-      scratchLongs = new long[HiveDecimal.SCRATCH_LONGS_LEN];
-      scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
-      this.scaleStream = createIntegerWriter(writer.createStream(id,
-          OrcProto.Stream.Kind.SECONDARY), true, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      DecimalColumnVector vec = (DecimalColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          HiveDecimalWritable value = vec.vector[0];
-          indexStatistics.updateDecimal(value);
-          if (createBloomFilter) {
-
-            // The HiveDecimalWritable toString() method with a scratch buffer for good performance
-            // when creating the String.  We need to use a String hash code and not UTF-8 byte[]
-            // hash code in order to get the right hash code.
-            bloomFilter.addString(value.toString(scratchBuffer));
-          }
-          for(int i=0; i < length; ++i) {
-
-            // Use the fast ORC serialization method that emulates SerializationUtils.writeBigInteger
-            // provided by HiveDecimalWritable.
-            value.serializationUtilsWrite(
-                valueStream,
-                scratchLongs);
-            scaleStream.write(value.scale());
-          }
-        }
-      } else {
-        for(int i=0; i < length; ++i) {
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            HiveDecimalWritable value = vec.vector[i + offset];
-            value.serializationUtilsWrite(
-                valueStream,
-                scratchLongs);
-            scaleStream.write(value.scale());
-            indexStatistics.updateDecimal(value);
-            if (createBloomFilter) {
-              bloomFilter.addString(value.toString(scratchBuffer));
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      valueStream.flush();
-      scaleStream.flush();
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      valueStream.getPosition(recorder);
-      scaleStream.getPosition(recorder);
-    }
-  }
-
-  private static class StructTreeWriter extends TreeWriter {
-    StructTreeWriter(int columnId,
-                     TypeDescription schema,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      List<TypeDescription> children = schema.getChildren();
-      childrenWriters = new TreeWriter[children.size()];
-      for(int i=0; i < childrenWriters.length; ++i) {
-        childrenWriters[i] = createTreeWriter(
-          children.get(i), writer,
-          true);
-      }
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeRootBatch(VectorizedRowBatch batch, int offset,
-                        int length) throws IOException {
-      // update the statistics for the root column
-      indexStatistics.increment(length);
-      // I'm assuming that the root column isn't nullable so that I don't need
-      // to update isPresent.
-      for(int i=0; i < childrenWriters.length; ++i) {
-        childrenWriters[i].writeBatch(batch.cols[i], offset, length);
-      }
-    }
-
-    private static void writeFields(StructColumnVector vector,
-                                    TreeWriter[] childrenWriters,
-                                    int offset, int length) throws IOException {
-      for(int field=0; field < childrenWriters.length; ++field) {
-        childrenWriters[field].writeBatch(vector.fields[field], offset, length);
-      }
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      StructColumnVector vec = (StructColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          writeFields(vec, childrenWriters, offset, length);
-        }
-      } else if (vector.noNulls) {
-        writeFields(vec, childrenWriters, offset, length);
-      } else {
-        // write the records in runs
-        int currentRun = 0;
-        boolean started = false;
-        for(int i=0; i < length; ++i) {
-          if (!vec.isNull[i + offset]) {
-            if (!started) {
-              started = true;
-              currentRun = i;
-            }
-          } else if (started) {
-            started = false;
-            writeFields(vec, childrenWriters, offset + currentRun,
-                i - currentRun);
-          }
-        }
-        if (started) {
-          writeFields(vec, childrenWriters, offset + currentRun,
-              length - currentRun);
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      for(TreeWriter child: childrenWriters) {
-        child.writeStripe(builder, requiredIndexEntries);
-      }
-      recordPosition(rowIndexPosition);
-    }
-  }
-
-  private static class ListTreeWriter extends TreeWriter {
-    private final IntegerWriter lengths;
-    private final boolean isDirectV2;
-
-    ListTreeWriter(int columnId,
-                   TypeDescription schema,
-                   StreamFactory writer,
-                   boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      childrenWriters = new TreeWriter[1];
-      childrenWriters[0] =
-        createTreeWriter(schema.getChildren().get(0), writer, true);
-      lengths = createIntegerWriter(writer.createStream(columnId,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      ListColumnVector vec = (ListColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int childOffset = (int) vec.offsets[0];
-          int childLength = (int) vec.lengths[0];
-          for(int i=0; i < length; ++i) {
-            lengths.write(childLength);
-            childrenWriters[0].writeBatch(vec.child, childOffset, childLength);
-          }
-          if (createBloomFilter) {
-            bloomFilter.addLong(childLength);
-          }
-        }
-      } else {
-        // write the elements in runs
-        int currentOffset = 0;
-        int currentLength = 0;
-        for(int i=0; i < length; ++i) {
-          if (!vec.isNull[i + offset]) {
-            int nextLength = (int) vec.lengths[offset + i];
-            int nextOffset = (int) vec.offsets[offset + i];
-            lengths.write(nextLength);
-            if (currentLength == 0) {
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else if (currentOffset + currentLength != nextOffset) {
-              childrenWriters[0].writeBatch(vec.child, currentOffset,
-                  currentLength);
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else {
-              currentLength += nextLength;
-            }
-          }
-        }
-        if (currentLength != 0) {
-          childrenWriters[0].writeBatch(vec.child, currentOffset,
-              currentLength);
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      lengths.flush();
-      for(TreeWriter child: childrenWriters) {
-        child.writeStripe(builder, requiredIndexEntries);
-      }
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      lengths.getPosition(recorder);
-    }
-  }
-
-  private static class MapTreeWriter extends TreeWriter {
-    private final IntegerWriter lengths;
-    private final boolean isDirectV2;
-
-    MapTreeWriter(int columnId,
-                  TypeDescription schema,
-                  StreamFactory writer,
-                  boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      this.isDirectV2 = isNewWriteFormat(writer);
-      childrenWriters = new TreeWriter[2];
-      List<TypeDescription> children = schema.getChildren();
-      childrenWriters[0] =
-        createTreeWriter(children.get(0), writer, true);
-      childrenWriters[1] =
-        createTreeWriter(children.get(1), writer, true);
-      lengths = createIntegerWriter(writer.createStream(columnId,
-          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    OrcProto.ColumnEncoding getEncoding() {
-      if (isDirectV2) {
-        return OrcProto.ColumnEncoding.newBuilder()
-            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
-      }
-      return OrcProto.ColumnEncoding.newBuilder()
-          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      MapColumnVector vec = (MapColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          int childOffset = (int) vec.offsets[0];
-          int childLength = (int) vec.lengths[0];
-          for(int i=0; i < length; ++i) {
-            lengths.write(childLength);
-            childrenWriters[0].writeBatch(vec.keys, childOffset, childLength);
-            childrenWriters[1].writeBatch(vec.values, childOffset, childLength);
-          }
-          if (createBloomFilter) {
-            bloomFilter.addLong(childLength);
-          }
-        }
-      } else {
-        // write the elements in runs
-        int currentOffset = 0;
-        int currentLength = 0;
-        for(int i=0; i < length; ++i) {
-          if (!vec.isNull[i + offset]) {
-            int nextLength = (int) vec.lengths[offset + i];
-            int nextOffset = (int) vec.offsets[offset + i];
-            lengths.write(nextLength);
-            if (currentLength == 0) {
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else if (currentOffset + currentLength != nextOffset) {
-              childrenWriters[0].writeBatch(vec.keys, currentOffset,
-                  currentLength);
-              childrenWriters[1].writeBatch(vec.values, currentOffset,
-                  currentLength);
-              currentOffset = nextOffset;
-              currentLength = nextLength;
-            } else {
-              currentLength += nextLength;
-            }
-          }
-        }
-        if (currentLength != 0) {
-          childrenWriters[0].writeBatch(vec.keys, currentOffset,
-              currentLength);
-          childrenWriters[1].writeBatch(vec.values, currentOffset,
-              currentLength);
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      lengths.flush();
-      for(TreeWriter child: childrenWriters) {
-        child.writeStripe(builder, requiredIndexEntries);
-      }
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      lengths.getPosition(recorder);
-    }
-  }
-
-  private static class UnionTreeWriter extends TreeWriter {
-    private final RunLengthByteWriter tags;
-
-    UnionTreeWriter(int columnId,
-                  TypeDescription schema,
-                  StreamFactory writer,
-                  boolean nullable) throws IOException {
-      super(columnId, schema, writer, nullable);
-      List<TypeDescription> children = schema.getChildren();
-      childrenWriters = new TreeWriter[children.size()];
-      for(int i=0; i < childrenWriters.length; ++i) {
-        childrenWriters[i] =
-            createTreeWriter(children.get(i), writer, true);
-      }
-      tags =
-        new RunLengthByteWriter(writer.createStream(columnId,
-            OrcProto.Stream.Kind.DATA));
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void writeBatch(ColumnVector vector, int offset,
-                    int length) throws IOException {
-      super.writeBatch(vector, offset, length);
-      UnionColumnVector vec = (UnionColumnVector) vector;
-      if (vector.isRepeating) {
-        if (vector.noNulls || !vector.isNull[0]) {
-          byte tag = (byte) vec.tags[0];
-          for(int i=0; i < length; ++i) {
-            tags.write(tag);
-          }
-          if (createBloomFilter) {
-            bloomFilter.addLong(tag);
-          }
-          childrenWriters[tag].writeBatch(vec.fields[tag], offset, length);
-        }
-      } else {
-        // write the records in runs of the same tag
-        int[] currentStart = new int[vec.fields.length];
-        int[] currentLength = new int[vec.fields.length];
-        for(int i=0; i < length; ++i) {
-          // only need to deal with the non-nulls, since the nulls were dealt
-          // with in the super method.
-          if (vec.noNulls || !vec.isNull[i + offset]) {
-            byte tag = (byte) vec.tags[offset + i];
-            tags.write(tag);
-            if (currentLength[tag] == 0) {
-              // start a new sequence
-              currentStart[tag] = i + offset;
-              currentLength[tag] = 1;
-            } else if (currentStart[tag] + currentLength[tag] == i + offset) {
-              // ok, we are extending the current run for that tag.
-              currentLength[tag] += 1;
-            } else {
-              // otherwise, we need to close off the old run and start a new one
-              childrenWriters[tag].writeBatch(vec.fields[tag],
-                  currentStart[tag], currentLength[tag]);
-              currentStart[tag] = i + offset;
-              currentLength[tag] = 1;
-            }
-          }
-        }
-        // write out any left over sequences
-        for(int tag=0; tag < currentStart.length; ++tag) {
-          if (currentLength[tag] != 0) {
-            childrenWriters[tag].writeBatch(vec.fields[tag], currentStart[tag],
-                currentLength[tag]);
-          }
-        }
-      }
-    }
-
-    @Override
-    void writeStripe(OrcProto.StripeFooter.Builder builder,
-                     int requiredIndexEntries) throws IOException {
-      super.writeStripe(builder, requiredIndexEntries);
-      tags.flush();
-      for(TreeWriter child: childrenWriters) {
-        child.writeStripe(builder, requiredIndexEntries);
-      }
-      recordPosition(rowIndexPosition);
-    }
-
-    @Override
-    void recordPosition(PositionRecorder recorder) throws IOException {
-      super.recordPosition(recorder);
-      tags.getPosition(recorder);
-    }
-  }
-
-  private static TreeWriter createTreeWriter(TypeDescription schema,
-                                             StreamFactory streamFactory,
-                                             boolean nullable) throws IOException {
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-        return new BooleanTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case BYTE:
-        return new ByteTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case SHORT:
-      case INT:
-      case LONG:
-        return new IntegerTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case FLOAT:
-        return new FloatTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case DOUBLE:
-        return new DoubleTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case STRING:
-        return new StringTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case CHAR:
-        return new CharTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case VARCHAR:
-        return new VarcharTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case BINARY:
-        return new BinaryTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case TIMESTAMP:
-        return new TimestampTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case DATE:
-        return new DateTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case DECIMAL:
-        return new DecimalTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory,  nullable);
-      case STRUCT:
-        return new StructTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case MAP:
-        return new MapTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case LIST:
-        return new ListTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      case UNION:
-        return new UnionTreeWriter(streamFactory.getNextColumnId(),
-            schema, streamFactory, nullable);
-      default:
-        throw new IllegalArgumentException("Bad category: " +
-            schema.getCategory());
-    }
-  }
-
-  private static void writeTypes(OrcProto.Footer.Builder builder,
-                                 TypeDescription schema) {
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    List<TypeDescription> children = OrcUtils.setTypeBuilderFromSchema(type, schema);
-    builder.addTypes(type);
-    if (children != null) {
-      for(TypeDescription child: children) {
-        writeTypes(builder, child);
-      }
-    }
-  }
-
-  @VisibleForTesting
-  public void ensureStream() throws IOException {
-    physWriter.initialize();
-  }
-
-  private void createRowIndexEntry() throws IOException {
-    treeWriter.createRowIndexEntry();
-    rowsInIndex = 0;
-  }
-
-  private void flushStripe() throws IOException {
-    ensureStream();
-    if (buildIndex && rowsInIndex != 0) {
-      createRowIndexEntry();
-    }
-    if (rowsInStripe != 0) {
-      if (callback != null) {
-        callback.preStripeWrite(callbackContext);
-      }
-      // finalize the data for the stripe
-      int requiredIndexEntries = rowIndexStride == 0 ? 0 :
-          (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
-      OrcProto.StripeFooter.Builder builder = OrcProto.StripeFooter.newBuilder();
-      OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation
-          .newBuilder().setNumberOfRows(rowsInStripe);
-      treeWriter.writeStripe(builder, requiredIndexEntries);
-      physWriter.finalizeStripe(builder, dirEntry);
-      stripes.add(dirEntry.build());
-      rowCount += rowsInStripe;
-      rowsInStripe = 0;
-    }
-  }
-
-  private long computeRawDataSize() {
-    return getRawDataSize(treeWriter, schema);
-  }
-
-  private long getRawDataSize(TreeWriter child,
-                              TypeDescription schema) {
-    long total = 0;
-    long numVals = child.fileStatistics.getNumberOfValues();
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-      case BYTE:
-      case SHORT:
-      case INT:
-      case FLOAT:
-        return numVals * JavaDataModel.get().primitive1();
-      case LONG:
-      case DOUBLE:
-        return numVals * JavaDataModel.get().primitive2();
-      case STRING:
-      case VARCHAR:
-      case CHAR:
-        // ORC strings are converted to java Strings. so use JavaDataModel to
-        // compute the overall size of strings
-        StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
-        numVals = numVals == 0 ? 1 : numVals;
-        int avgStringLen = (int) (scs.getSum() / numVals);
-        return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
-      case DECIMAL:
-        return numVals * JavaDataModel.get().lengthOfDecimal();
-      case DATE:
-        return numVals * JavaDataModel.get().lengthOfDate();
-      case BINARY:
-        // get total length of binary blob
-        BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
-        return bcs.getSum();
-      case TIMESTAMP:
-        return numVals * JavaDataModel.get().lengthOfTimestamp();
-      case LIST:
-      case MAP:
-      case UNION:
-      case STRUCT: {
-        TreeWriter[] childWriters = child.getChildrenWriters();
-        List<TypeDescription> childTypes = schema.getChildren();
-        for (int i=0; i < childWriters.length; ++i) {
-          total += getRawDataSize(childWriters[i], childTypes.get(i));
-        }
-        break;
-      }
-      default:
-        LOG.debug("Unknown object inspector category.");
-        break;
-    }
-    return total;
-  }
-
-  private void writeFileStatistics(OrcProto.Footer.Builder builder,
-                                   TreeWriter writer) throws IOException {
-    builder.addStatistics(writer.fileStatistics.serialize());
-    for(TreeWriter child: writer.getChildrenWriters()) {
-      writeFileStatistics(builder, child);
-    }
-  }
-
-  private void writeMetadata() throws IOException {
-    ensureStream();
-    OrcProto.Metadata.Builder builder = OrcProto.Metadata.newBuilder();
-    for(OrcProto.StripeStatistics.Builder ssb : treeWriter.stripeStatsBuilders) {
-      builder.addStripeStats(ssb.build());
-    }
-
-    physWriter.writeFileMetadata(builder);
-  }
-
-  private void writeFooter() throws IOException {
-    ensureStream();
-    OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
-    builder.setNumberOfRows(rowCount);
-    builder.setRowIndexStride(rowIndexStride);
-    // populate raw data size
-    rawDataSize = computeRawDataSize();
-    // serialize the types
-    writeTypes(builder, schema);
-    // add the stripe information
-    for(OrcProto.StripeInformation stripe: stripes) {
-      builder.addStripes(stripe);
-    }
-    // add the column statistics
-    writeFileStatistics(builder, treeWriter);
-    // add all of the user metadata
-    for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) {
-      builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
-        .setName(entry.getKey()).setValue(entry.getValue()));
-    }
-    physWriter.writeFileFooter(builder);
-  }
-
-  private void writePostScript() throws IOException {
-    OrcProto.PostScript.Builder builder =
-      OrcProto.PostScript.newBuilder()
-        .setMagic(OrcFile.MAGIC)
-        .addVersion(version.getMajor())
-        .addVersion(version.getMinor())
-        .setWriterVersion(OrcFile.CURRENT_WRITER.getId());
-    physWriter.writePostScript(builder);
-  }
-
-  private long estimateStripeSize() {
-    return physWriter.estimateMemory() + treeWriter.estimateMemory();
-  }
-
-  @Override
-  public TypeDescription getSchema() {
-    return schema;
-  }
-
-  @Override
-  public void addUserMetadata(String name, ByteBuffer value) {
-    userMetadata.put(name, ByteString.copyFrom(value));
-  }
-
-  @Override
-  public void addRowBatch(VectorizedRowBatch batch) throws IOException {
-    if (buildIndex) {
-      // Batch the writes up to the rowIndexStride so that we can get the
-      // right size indexes.
-      int posn = 0;
-      while (posn < batch.size) {
-        int chunkSize = Math.min(batch.size - posn,
-            rowIndexStride - rowsInIndex);
-        treeWriter.writeRootBatch(batch, posn, chunkSize);
-        posn += chunkSize;
-        rowsInIndex += chunkSize;
-        rowsInStripe += chunkSize;
-        if (rowsInIndex >= rowIndexStride) {
-          createRowIndexEntry();
-        }
-      }
-    } else {
-      rowsInStripe += batch.size;
-      treeWriter.writeRootBatch(batch, 0, batch.size);
-    }
-    if (path != null) {
-      memoryManager.addedRow(batch.size);
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    if (callback != null) {
-      callback.preFooterWrite(callbackContext);
-    }
-    // remove us from the memory manager so that we don't get any callbacks
-    if (path != null) {
-      memoryManager.removeWriter(path);
-    }
-    // actually close the file
-    flushStripe();
-    writeMetadata();
-    writeFooter();
-    writePostScript();
-    physWriter.close();
-  }
-
-  /**
-   * Raw data size will be compute when writing the file footer. Hence raw data
-   * size value will be available only after closing the writer.
-   */
-  @Override
-  public long getRawDataSize() {
-    return rawDataSize;
-  }
-
-  /**
-   * Row count gets updated when flushing the stripes. To get accurate row
-   * count call this method after writer is closed.
-   */
-  @Override
-  public long getNumberOfRows() {
-    return rowCount;
-  }
-
-  @Override
-  public long writeIntermediateFooter() throws IOException {
-    // flush any buffered rows
-    flushStripe();
-    // write a footer
-    if (stripesAtLastFlush != stripes.size()) {
-      if (callback != null) {
-        callback.preFooterWrite(callbackContext);
-      }
-      writeMetadata();
-      writeFooter();
-      writePostScript();
-      stripesAtLastFlush = stripes.size();
-      physWriter.flush();
-    }
-    return physWriter.getRawWriterPosition();
-  }
-
-  @Override
-  public void appendStripe(byte[] stripe, int offset, int length,
-      StripeInformation stripeInfo,
-      OrcProto.StripeStatistics stripeStatistics) throws IOException {
-    checkArgument(stripe != null, "Stripe must not be null");
-    checkArgument(length <= stripe.length,
-        "Specified length must not be greater specified array length");
-    checkArgument(stripeInfo != null, "Stripe information must not be null");
-    checkArgument(stripeStatistics != null,
-        "Stripe statistics must not be null");
-
-    ensureStream();
-    OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation.newBuilder();
-    physWriter.appendRawStripe(stripe, offset, length, dirEntry);
-
-    rowsInStripe = stripeStatistics.getColStats(0).getNumberOfValues();
-    rowCount += rowsInStripe;
-
-    // since we have already written the stripe, just update stripe statistics
-    treeWriter.stripeStatsBuilders.add(stripeStatistics.toBuilder());
-
-    // update file level statistics
-    updateFileStatistics(stripeStatistics);
-
-    // update stripe information
-    stripes.add(dirEntry.setNumberOfRows(rowsInStripe)
-        .setIndexLength(stripeInfo.getIndexLength())
-        .setDataLength(stripeInfo.getDataLength())
-        .setFooterLength(stripeInfo.getFooterLength())
-        .build());
-
-    // reset it after writing the stripe
-    rowsInStripe = 0;
-  }
-
-  private void updateFileStatistics(OrcProto.StripeStatistics stripeStatistics) {
-    List<OrcProto.ColumnStatistics> cs = 

<TRUNCATED>

[37/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
HIVE-17118. Move the hive-orc source files to make the package names unique.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/df8921d8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/df8921d8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/df8921d8

Branch: refs/heads/branch-2.2
Commit: df8921d851744c7c0aca956f5f3420fd08ee2134
Parents: 3e42672
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Jul 18 12:57:07 2017 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Jul 18 14:15:51 2017 -0700

----------------------------------------------------------------------
 bin/ext/orcfiledump.cmd                         |     2 +-
 bin/ext/orcfiledump.sh                          |     4 +-
 orc/pom.xml                                     |    32 -
 .../org/apache/hive/orc/OrcProto.java           | 20179 +++++++++++++++++
 .../protobuf-java/org/apache/orc/OrcProto.java  | 20179 -----------------
 .../apache/hive/orc/BinaryColumnStatistics.java |    25 +
 .../java/org/apache/hive/orc/BloomFilterIO.java |    43 +
 .../hive/orc/BooleanColumnStatistics.java       |    27 +
 .../org/apache/hive/orc/ColumnStatistics.java   |    36 +
 .../org/apache/hive/orc/CompressionCodec.java   |    69 +
 .../org/apache/hive/orc/CompressionKind.java    |    27 +
 .../java/org/apache/hive/orc/DataReader.java    |    76 +
 .../apache/hive/orc/DateColumnStatistics.java   |    37 +
 .../hive/orc/DecimalColumnStatistics.java       |    45 +
 .../apache/hive/orc/DoubleColumnStatistics.java |    44 +
 .../apache/hive/orc/FileFormatException.java    |    30 +
 .../java/org/apache/hive/orc/FileMetadata.java  |    60 +
 .../hive/orc/IntegerColumnStatistics.java       |    50 +
 orc/src/java/org/apache/hive/orc/OrcConf.java   |   193 +
 orc/src/java/org/apache/hive/orc/OrcFile.java   |   574 +
 orc/src/java/org/apache/hive/orc/OrcUtils.java  |   623 +
 orc/src/java/org/apache/hive/orc/Reader.java    |   375 +
 .../java/org/apache/hive/orc/RecordReader.java  |    64 +
 .../apache/hive/orc/StringColumnStatistics.java |    41 +
 .../org/apache/hive/orc/StripeInformation.java  |    59 +
 .../org/apache/hive/orc/StripeStatistics.java   |    44 +
 .../hive/orc/TimestampColumnStatistics.java     |    38 +
 .../org/apache/hive/orc/TypeDescription.java    |   870 +
 orc/src/java/org/apache/hive/orc/Writer.java    |   110 +
 .../org/apache/hive/orc/impl/AcidStats.java     |    60 +
 .../apache/hive/orc/impl/BitFieldReader.java    |   214 +
 .../apache/hive/orc/impl/BitFieldWriter.java    |    69 +
 .../org/apache/hive/orc/impl/BufferChunk.java   |    85 +
 .../hive/orc/impl/ColumnStatisticsImpl.java     |  1101 +
 .../hive/orc/impl/ConvertTreeReaderFactory.java |  2892 +++
 .../hive/orc/impl/DataReaderProperties.java     |   122 +
 .../hive/orc/impl/DirectDecompressionCodec.java |    28 +
 .../apache/hive/orc/impl/DynamicByteArray.java  |   303 +
 .../apache/hive/orc/impl/DynamicIntArray.java   |   142 +
 .../org/apache/hive/orc/impl/HadoopShims.java   |   143 +
 .../hive/orc/impl/HadoopShimsCurrent.java       |    92 +
 .../apache/hive/orc/impl/HadoopShims_2_2.java   |   101 +
 .../java/org/apache/hive/orc/impl/InStream.java |   498 +
 .../org/apache/hive/orc/impl/IntegerReader.java |    82 +
 .../org/apache/hive/orc/impl/IntegerWriter.java |    47 +
 .../org/apache/hive/orc/impl/MemoryManager.java |   212 +
 .../org/apache/hive/orc/impl/OrcAcidUtils.java  |    88 +
 .../java/org/apache/hive/orc/impl/OrcIndex.java |    43 +
 .../java/org/apache/hive/orc/impl/OrcTail.java  |   138 +
 .../org/apache/hive/orc/impl/OutStream.java     |   289 +
 .../apache/hive/orc/impl/PhysicalFsWriter.java  |   529 +
 .../apache/hive/orc/impl/PhysicalWriter.java    |   122 +
 .../apache/hive/orc/impl/PositionProvider.java  |    26 +
 .../apache/hive/orc/impl/PositionRecorder.java  |    25 +
 .../hive/orc/impl/PositionedOutputStream.java   |    39 +
 .../org/apache/hive/orc/impl/ReaderImpl.java    |   763 +
 .../apache/hive/orc/impl/RecordReaderImpl.java  |  1238 +
 .../apache/hive/orc/impl/RecordReaderUtils.java |   578 +
 .../org/apache/hive/orc/impl/RedBlackTree.java  |   309 +
 .../hive/orc/impl/RunLengthByteReader.java      |   174 +
 .../hive/orc/impl/RunLengthByteWriter.java      |   106 +
 .../hive/orc/impl/RunLengthIntegerReader.java   |   173 +
 .../hive/orc/impl/RunLengthIntegerReaderV2.java |   406 +
 .../hive/orc/impl/RunLengthIntegerWriter.java   |   143 +
 .../hive/orc/impl/RunLengthIntegerWriterV2.java |   831 +
 .../apache/hive/orc/impl/SchemaEvolution.java   |   399 +
 .../hive/orc/impl/SerializationUtils.java       |  1311 ++
 .../orc/impl/SettableUncompressedStream.java    |    43 +
 .../org/apache/hive/orc/impl/SnappyCodec.java   |   108 +
 .../org/apache/hive/orc/impl/StreamName.java    |    97 +
 .../hive/orc/impl/StringRedBlackTree.java       |   207 +
 .../apache/hive/orc/impl/TreeReaderFactory.java |  2162 ++
 .../org/apache/hive/orc/impl/WriterImpl.java    |  2443 ++
 .../org/apache/hive/orc/impl/ZeroCopyShims.java |    89 +
 .../org/apache/hive/orc/impl/ZlibCodec.java     |   168 +
 .../org/apache/hive/orc/tools/FileDump.java     |   946 +
 .../org/apache/hive/orc/tools/JsonFileDump.java |   411 +
 .../org/apache/orc/BinaryColumnStatistics.java  |    27 -
 orc/src/java/org/apache/orc/BloomFilterIO.java  |    43 -
 .../org/apache/orc/BooleanColumnStatistics.java |    29 -
 .../java/org/apache/orc/ColumnStatistics.java   |    36 -
 .../java/org/apache/orc/CompressionCodec.java   |    69 -
 .../java/org/apache/orc/CompressionKind.java    |    27 -
 orc/src/java/org/apache/orc/DataReader.java     |    76 -
 .../org/apache/orc/DateColumnStatistics.java    |    39 -
 .../org/apache/orc/DecimalColumnStatistics.java |    46 -
 .../org/apache/orc/DoubleColumnStatistics.java  |    46 -
 .../org/apache/orc/FileFormatException.java     |    30 -
 orc/src/java/org/apache/orc/FileMetadata.java   |    64 -
 .../org/apache/orc/IntegerColumnStatistics.java |    52 -
 orc/src/java/org/apache/orc/OrcConf.java        |   193 -
 orc/src/java/org/apache/orc/OrcFile.java        |   574 -
 orc/src/java/org/apache/orc/OrcUtils.java       |   624 -
 orc/src/java/org/apache/orc/Reader.java         |   375 -
 orc/src/java/org/apache/orc/RecordReader.java   |    64 -
 .../org/apache/orc/StringColumnStatistics.java  |    43 -
 .../java/org/apache/orc/StripeInformation.java  |    59 -
 .../java/org/apache/orc/StripeStatistics.java   |    44 -
 .../apache/orc/TimestampColumnStatistics.java   |    38 -
 .../java/org/apache/orc/TypeDescription.java    |   870 -
 orc/src/java/org/apache/orc/Writer.java         |   114 -
 orc/src/java/org/apache/orc/impl/AcidStats.java |    60 -
 .../org/apache/orc/impl/BitFieldReader.java     |   217 -
 .../org/apache/orc/impl/BitFieldWriter.java     |    73 -
 .../java/org/apache/orc/impl/BufferChunk.java   |    85 -
 .../apache/orc/impl/ColumnStatisticsImpl.java   |  1101 -
 .../orc/impl/ConvertTreeReaderFactory.java      |  2893 ---
 .../apache/orc/impl/DataReaderProperties.java   |   124 -
 .../orc/impl/DirectDecompressionCodec.java      |    28 -
 .../org/apache/orc/impl/DynamicByteArray.java   |   303 -
 .../org/apache/orc/impl/DynamicIntArray.java    |   142 -
 .../java/org/apache/orc/impl/HadoopShims.java   |   143 -
 .../org/apache/orc/impl/HadoopShimsCurrent.java |    92 -
 .../org/apache/orc/impl/HadoopShims_2_2.java    |   101 -
 orc/src/java/org/apache/orc/impl/InStream.java  |   498 -
 .../java/org/apache/orc/impl/IntegerReader.java |    82 -
 .../java/org/apache/orc/impl/IntegerWriter.java |    47 -
 .../java/org/apache/orc/impl/MemoryManager.java |   214 -
 .../java/org/apache/orc/impl/OrcAcidUtils.java  |    88 -
 orc/src/java/org/apache/orc/impl/OrcIndex.java  |    43 -
 orc/src/java/org/apache/orc/impl/OrcTail.java   |   140 -
 orc/src/java/org/apache/orc/impl/OutStream.java |   289 -
 .../org/apache/orc/impl/PhysicalFsWriter.java   |   529 -
 .../org/apache/orc/impl/PhysicalWriter.java     |   122 -
 .../org/apache/orc/impl/PositionProvider.java   |    26 -
 .../org/apache/orc/impl/PositionRecorder.java   |    25 -
 .../apache/orc/impl/PositionedOutputStream.java |    39 -
 .../java/org/apache/orc/impl/ReaderImpl.java    |   764 -
 .../org/apache/orc/impl/RecordReaderImpl.java   |  1238 -
 .../org/apache/orc/impl/RecordReaderUtils.java  |   578 -
 .../java/org/apache/orc/impl/RedBlackTree.java  |   311 -
 .../apache/orc/impl/RunLengthByteReader.java    |   174 -
 .../apache/orc/impl/RunLengthByteWriter.java    |   106 -
 .../apache/orc/impl/RunLengthIntegerReader.java |   173 -
 .../orc/impl/RunLengthIntegerReaderV2.java      |   406 -
 .../apache/orc/impl/RunLengthIntegerWriter.java |   143 -
 .../orc/impl/RunLengthIntegerWriterV2.java      |   831 -
 .../org/apache/orc/impl/SchemaEvolution.java    |   399 -
 .../org/apache/orc/impl/SerializationUtils.java |  1311 --
 .../orc/impl/SettableUncompressedStream.java    |    44 -
 .../java/org/apache/orc/impl/SnappyCodec.java   |   108 -
 .../java/org/apache/orc/impl/StreamName.java    |    97 -
 .../org/apache/orc/impl/StringRedBlackTree.java |   210 -
 .../org/apache/orc/impl/TreeReaderFactory.java  |  2163 --
 .../java/org/apache/orc/impl/WriterImpl.java    |  2446 --
 .../java/org/apache/orc/impl/ZeroCopyShims.java |    89 -
 orc/src/java/org/apache/orc/impl/ZlibCodec.java |   169 -
 orc/src/java/org/apache/orc/tools/FileDump.java |   946 -
 .../java/org/apache/orc/tools/JsonFileDump.java |   412 -
 orc/src/protobuf/orc_proto.proto                |     2 +-
 .../apache/hive/orc/TestColumnStatistics.java   |   364 +
 .../apache/hive/orc/TestNewIntegerEncoding.java |  1373 ++
 .../hive/orc/TestOrcNullOptimization.java       |   415 +
 .../org/apache/hive/orc/TestOrcTimezone1.java   |   189 +
 .../org/apache/hive/orc/TestOrcTimezone2.java   |   143 +
 .../org/apache/hive/orc/TestOrcTimezone3.java   |   124 +
 .../apache/hive/orc/TestStringDictionary.java   |   291 +
 .../apache/hive/orc/TestTypeDescription.java    |    90 +
 .../apache/hive/orc/TestUnrolledBitPack.java    |   114 +
 .../org/apache/hive/orc/TestVectorOrcFile.java  |  2789 +++
 .../hive/orc/impl/TestBitFieldReader.java       |   145 +
 .../org/apache/hive/orc/impl/TestBitPack.java   |   279 +
 .../hive/orc/impl/TestColumnStatisticsImpl.java |    64 +
 .../hive/orc/impl/TestDataReaderProperties.java |    85 +
 .../apache/hive/orc/impl/TestDynamicArray.java  |    88 +
 .../org/apache/hive/orc/impl/TestInStream.java  |   314 +
 .../orc/impl/TestIntegerCompressionReader.java  |   130 +
 .../apache/hive/orc/impl/TestMemoryManager.java |   132 +
 .../apache/hive/orc/impl/TestOrcWideTable.java  |    64 +
 .../org/apache/hive/orc/impl/TestOutStream.java |    43 +
 .../org/apache/hive/orc/impl/TestRLEv2.java     |   307 +
 .../apache/hive/orc/impl/TestReaderImpl.java    |   152 +
 .../hive/orc/impl/TestRecordReaderImpl.java     |  1708 ++
 .../hive/orc/impl/TestRunLengthByteReader.java  |   143 +
 .../orc/impl/TestRunLengthIntegerReader.java    |   125 +
 .../hive/orc/impl/TestSchemaEvolution.java      |   480 +
 .../hive/orc/impl/TestSerializationUtils.java   |   199 +
 .../apache/hive/orc/impl/TestStreamName.java    |    49 +
 .../hive/orc/impl/TestStringRedBlackTree.java   |   232 +
 .../test/org/apache/hive/orc/impl/TestZlib.java |    56 +
 .../org/apache/hive/orc/tools/TestFileDump.java |   485 +
 .../apache/hive/orc/tools/TestJsonFileDump.java |   150 +
 .../org/apache/orc/TestColumnStatistics.java    |   365 -
 .../org/apache/orc/TestNewIntegerEncoding.java  |  1373 --
 .../org/apache/orc/TestOrcNullOptimization.java |   415 -
 .../test/org/apache/orc/TestOrcTimezone1.java   |   189 -
 .../test/org/apache/orc/TestOrcTimezone2.java   |   143 -
 .../test/org/apache/orc/TestOrcTimezone3.java   |   126 -
 .../org/apache/orc/TestStringDictionary.java    |   290 -
 .../org/apache/orc/TestTypeDescription.java     |    91 -
 .../org/apache/orc/TestUnrolledBitPack.java     |   114 -
 .../test/org/apache/orc/TestVectorOrcFile.java  |  2789 ---
 .../org/apache/orc/impl/TestBitFieldReader.java |   145 -
 .../test/org/apache/orc/impl/TestBitPack.java   |   279 -
 .../orc/impl/TestColumnStatisticsImpl.java      |    64 -
 .../orc/impl/TestDataReaderProperties.java      |    86 -
 .../org/apache/orc/impl/TestDynamicArray.java   |    90 -
 .../test/org/apache/orc/impl/TestInStream.java  |   314 -
 .../orc/impl/TestIntegerCompressionReader.java  |   130 -
 .../org/apache/orc/impl/TestMemoryManager.java  |   133 -
 .../org/apache/orc/impl/TestOrcWideTable.java   |    64 -
 .../test/org/apache/orc/impl/TestOutStream.java |    43 -
 orc/src/test/org/apache/orc/impl/TestRLEv2.java |   307 -
 .../org/apache/orc/impl/TestReaderImpl.java     |   152 -
 .../apache/orc/impl/TestRecordReaderImpl.java   |  1709 --
 .../orc/impl/TestRunLengthByteReader.java       |   143 -
 .../orc/impl/TestRunLengthIntegerReader.java    |   125 -
 .../apache/orc/impl/TestSchemaEvolution.java    |   480 -
 .../apache/orc/impl/TestSerializationUtils.java |   201 -
 .../org/apache/orc/impl/TestStreamName.java     |    49 -
 .../apache/orc/impl/TestStringRedBlackTree.java |   234 -
 orc/src/test/org/apache/orc/impl/TestZlib.java  |    56 -
 .../test/org/apache/orc/tools/TestFileDump.java |   486 -
 .../org/apache/orc/tools/TestJsonFileDump.java  |   150 -
 214 files changed, 55660 insertions(+), 55754 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/bin/ext/orcfiledump.cmd
----------------------------------------------------------------------
diff --git a/bin/ext/orcfiledump.cmd b/bin/ext/orcfiledump.cmd
index ff4b410..2e7e2ca 100644
--- a/bin/ext/orcfiledump.cmd
+++ b/bin/ext/orcfiledump.cmd
@@ -14,7 +14,7 @@
 @rem See the License for the specific language governing permissions and
 @rem limitations under the License.
 
-set CLASS=org.apache.orc.tools.FileDump
+set CLASS=org.apache.hive.orc.tools.FileDump
 set HIVE_OPTS=
 set HADOOP_CLASSPATH=
 

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/bin/ext/orcfiledump.sh
----------------------------------------------------------------------
diff --git a/bin/ext/orcfiledump.sh b/bin/ext/orcfiledump.sh
index c84e61c..3e08280 100644
--- a/bin/ext/orcfiledump.sh
+++ b/bin/ext/orcfiledump.sh
@@ -17,7 +17,7 @@ THISSERVICE=orcfiledump
 export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
 
 orcfiledump () {
-  CLASS=org.apache.orc.tools.FileDump
+  CLASS=org.apache.hive.orc.tools.FileDump
   HIVE_OPTS=''
   execHiveCmd $CLASS "$@"
 }
@@ -34,4 +34,4 @@ orcfiledump_help () {
   echo "  --skip-dump                 Used along with --recover to directly recover files without dumping"
   echo "  --backup-path <new_path>  Specify a backup path to store the corrupted files (default: /tmp)"
   echo "  --help (-h)                 Print help message"
-} 
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/pom.xml
----------------------------------------------------------------------
diff --git a/orc/pom.xml b/orc/pom.xml
index de9c417..f75b91c 100644
--- a/orc/pom.xml
+++ b/orc/pom.xml
@@ -179,38 +179,6 @@
           </execution>
         </executions>
       </plugin>
-
-      <!-- Below we shade all of the org.apache.orc class names in hive-orc
-	   to a unique orc.apache.hive.orc prefix. This allows clients to
-	   use both Hive 2.2 and the standalone orc project. The uses in
-	   Hive 2.2 have been changed to use the org.apache.hive.orc prefix.
-	   In Hive 2.3 and beyond, hive uses the standalone ORC project. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <version>3.0.0</version>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <artifactSet>
-                <includes>
-                  <include>org.apache.hive:hive-orc</include>
-                </includes>
-              </artifactSet>
-              <relocations>
-                <relocation>
-                  <pattern>org.apache.orc</pattern>
-                  <shadedPattern>org.apache.hive.orc</shadedPattern>
-                </relocation>
-              </relocations>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
     </plugins>
   </build>
 </project>


[10/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestVectorOrcFile.java b/orc/src/test/org/apache/hive/orc/TestVectorOrcFile.java
new file mode 100644
index 0000000..d78a2ef
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestVectorOrcFile.java
@@ -0,0 +1,2789 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import com.google.common.collect.Lists;
+
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.orc.impl.DataReaderProperties;
+import org.apache.hive.orc.impl.OrcIndex;
+import org.apache.hive.orc.impl.RecordReaderImpl;
+import org.apache.hive.orc.tools.TestJsonFileDump;
+import org.apache.hive.orc.impl.MemoryManager;
+import org.apache.hive.orc.impl.RecordReaderUtils;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static junit.framework.TestCase.assertNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for the vectorized reader and writer for ORC files.
+ */
+public class TestVectorOrcFile {
+
+  public static class InnerStruct {
+    int int1;
+    Text string1 = new Text();
+    InnerStruct(int int1, Text string1) {
+      this.int1 = int1;
+      this.string1.set(string1);
+    }
+    InnerStruct(int int1, String string1) {
+      this.int1 = int1;
+      this.string1.set(string1);
+    }
+
+    public String toString() {
+      return "{" + int1 + ", " + string1 + "}";
+    }
+  }
+
+  public static class MiddleStruct {
+    List<InnerStruct> list = new ArrayList<InnerStruct>();
+
+    MiddleStruct(InnerStruct... items) {
+      list.clear();
+      list.addAll(Arrays.asList(items));
+    }
+  }
+
+  private static InnerStruct inner(int i, String s) {
+    return new InnerStruct(i, s);
+  }
+
+  private static Map<String, InnerStruct> map(InnerStruct... items)  {
+    Map<String, InnerStruct> result = new HashMap<String, InnerStruct>();
+    for(InnerStruct i: items) {
+      result.put(i.string1.toString(), i);
+    }
+    return result;
+  }
+
+  private static List<InnerStruct> list(InnerStruct... items) {
+    List<InnerStruct> result = new ArrayList<InnerStruct>();
+    result.addAll(Arrays.asList(items));
+    return result;
+  }
+
+  private static BytesWritable bytes(int... items) {
+    BytesWritable result = new BytesWritable();
+    result.setSize(items.length);
+    for(int i=0; i < items.length; ++i) {
+      result.getBytes()[i] = (byte) items[i];
+    }
+    return result;
+  }
+
+  private static byte[] bytesArray(int... items) {
+    byte[] result = new byte[items.length];
+    for(int i=0; i < items.length; ++i) {
+      result[i] = (byte) items[i];
+    }
+    return result;
+  }
+
+  private static ByteBuffer byteBuf(int... items) {
+    ByteBuffer result = ByteBuffer.allocate(items.length);
+    for(int item: items) {
+      result.put((byte) item);
+    }
+    result.flip();
+    return result;
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestVectorOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testReadFormat_0_11() throws Exception {
+    Path oldFilePath =
+        new Path(TestJsonFileDump.getFileFromClasspath("orc-file-11-format.orc"));
+    Reader reader = OrcFile.createReader(oldFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    int stripeCount = 0;
+    int rowCount = 0;
+    long currentOffset = -1;
+    for(StripeInformation stripe : reader.getStripes()) {
+      stripeCount += 1;
+      rowCount += stripe.getNumberOfRows();
+      if (currentOffset < 0) {
+        currentOffset = stripe.getOffset() + stripe.getIndexLength()
+            + stripe.getDataLength() + stripe.getFooterLength();
+      } else {
+        assertEquals(currentOffset, stripe.getOffset());
+        currentOffset += stripe.getIndexLength() + stripe.getDataLength()
+            + stripe.getFooterLength();
+      }
+    }
+    Assert.assertEquals(reader.getNumberOfRows(), rowCount);
+    assertEquals(2, stripeCount);
+
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(7500, stats[1].getNumberOfValues());
+    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+    assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+    assertEquals("count: 7500 hasNull: true true: 3750", stats[1].toString());
+
+    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+    assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum());
+    assertEquals("count: 7500 hasNull: true min: 1024 max: 2048 sum: 11520000",
+        stats[3].toString());
+
+    assertEquals(Long.MAX_VALUE,
+        ((IntegerColumnStatistics) stats[5]).getMaximum());
+    assertEquals(Long.MAX_VALUE,
+        ((IntegerColumnStatistics) stats[5]).getMinimum());
+    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
+    assertEquals(
+        "count: 7500 hasNull: true min: 9223372036854775807 max: 9223372036854775807",
+        stats[5].toString());
+
+    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum(), 0.0001);
+    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum(), 0.0001);
+    assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(),
+        0.00001);
+    assertEquals("count: 7500 hasNull: true min: -15.0 max: -5.0 sum: -75000.0",
+        stats[7].toString());
+
+    assertEquals("count: 7500 hasNull: true min: bye max: hi sum: 0", stats[9].toString());
+
+    // check the inspectors
+    TypeDescription schema = reader.getSchema();
+    assertEquals(TypeDescription.Category.STRUCT, schema.getCategory());
+    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
+        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
+        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
+        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
+        + "map:map<string,struct<int1:int,string1:string>>,ts:timestamp,"
+        + "decimal1:decimal(38,10)>", schema.toString());
+    VectorizedRowBatch batch = schema.createRowBatch();
+
+    RecordReader rows = reader.rows();
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(1024, batch.size);
+
+    // check the contents of the first row
+    assertEquals(false, getBoolean(batch, 0));
+    assertEquals(1, getByte(batch, 0));
+    assertEquals(1024, getShort(batch, 0));
+    assertEquals(65536, getInt(batch, 0));
+    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
+    assertEquals(1.0, getFloat(batch, 0), 0.00001);
+    assertEquals(-15.0, getDouble(batch, 0), 0.00001);
+    assertEquals(bytes(0, 1, 2, 3, 4), getBinary(batch, 0));
+    assertEquals("hi", getText(batch, 0).toString());
+
+    StructColumnVector middle = (StructColumnVector) batch.cols[9];
+    ListColumnVector midList = (ListColumnVector) middle.fields[0];
+    StructColumnVector midListStruct = (StructColumnVector) midList.child;
+    LongColumnVector midListInt = (LongColumnVector) midListStruct.fields[0];
+    BytesColumnVector midListStr = (BytesColumnVector) midListStruct.fields[1];
+    ListColumnVector list = (ListColumnVector) batch.cols[10];
+    StructColumnVector listStruct = (StructColumnVector) list.child;
+    LongColumnVector listInts = (LongColumnVector) listStruct.fields[0];
+    BytesColumnVector listStrs = (BytesColumnVector) listStruct.fields[1];
+    MapColumnVector map = (MapColumnVector) batch.cols[11];
+    BytesColumnVector mapKey = (BytesColumnVector) map.keys;
+    StructColumnVector mapValue = (StructColumnVector) map.values;
+    LongColumnVector mapValueInts = (LongColumnVector) mapValue.fields[0];
+    BytesColumnVector mapValueStrs = (BytesColumnVector) mapValue.fields[1];
+    TimestampColumnVector timestamp = (TimestampColumnVector) batch.cols[12];
+    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[13];
+
+    assertEquals(false, middle.isNull[0]);
+    assertEquals(2, midList.lengths[0]);
+    int start = (int) midList.offsets[0];
+    assertEquals(1, midListInt.vector[start]);
+    assertEquals("bye", midListStr.toString(start));
+    assertEquals(2, midListInt.vector[start + 1]);
+    assertEquals("sigh", midListStr.toString(start + 1));
+
+    assertEquals(2, list.lengths[0]);
+    start = (int) list.offsets[0];
+    assertEquals(3, listInts.vector[start]);
+    assertEquals("good", listStrs.toString(start));
+    assertEquals(4, listInts.vector[start + 1]);
+    assertEquals("bad", listStrs.toString(start + 1));
+    assertEquals(0, map.lengths[0]);
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+        timestamp.asScratchTimestamp(0));
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547456")),
+        decs.vector[0]);
+
+    // check the contents of row 7499
+    rows.seekToRow(7499);
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(true, getBoolean(batch, 0));
+    assertEquals(100, getByte(batch, 0));
+    assertEquals(2048, getShort(batch, 0));
+    assertEquals(65536, getInt(batch, 0));
+    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
+    assertEquals(2.0, getFloat(batch, 0), 0.00001);
+    assertEquals(-5.0, getDouble(batch, 0), 0.00001);
+    assertEquals(bytes(), getBinary(batch, 0));
+    assertEquals("bye", getText(batch, 0).toString());
+    assertEquals(false, middle.isNull[0]);
+    assertEquals(2, midList.lengths[0]);
+    start = (int) midList.offsets[0];
+    assertEquals(1, midListInt.vector[start]);
+    assertEquals("bye", midListStr.toString(start));
+    assertEquals(2, midListInt.vector[start + 1]);
+    assertEquals("sigh", midListStr.toString(start + 1));
+    assertEquals(3, list.lengths[0]);
+    start = (int) list.offsets[0];
+    assertEquals(100000000, listInts.vector[start]);
+    assertEquals("cat", listStrs.toString(start));
+    assertEquals(-100000, listInts.vector[start + 1]);
+    assertEquals("in", listStrs.toString(start + 1));
+    assertEquals(1234, listInts.vector[start + 2]);
+    assertEquals("hat", listStrs.toString(start + 2));
+    assertEquals(2, map.lengths[0]);
+    start = (int) map.offsets[0];
+    assertEquals("chani", mapKey.toString(start));
+    assertEquals(5, mapValueInts.vector[start]);
+    assertEquals("chani", mapValueStrs.toString(start));
+    assertEquals("mauddib", mapKey.toString(start + 1));
+    assertEquals(1, mapValueInts.vector[start + 1]);
+    assertEquals("mauddib", mapValueStrs.toString(start + 1));
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
+        timestamp.asScratchTimestamp(0));
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("12345678.6547457")),
+        decs.vector[0]);
+
+    // handle the close up
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    rows.close();
+  }
+
+  @Test
+  public void testTimestamp() throws Exception {
+    TypeDescription schema = TypeDescription.createTimestamp();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .bufferSize(10000).version(OrcFile.Version.V_0_11));
+    List<Timestamp> tslist = Lists.newArrayList();
+    tslist.add(Timestamp.valueOf("2037-01-01 00:00:00.000999"));
+    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00.000000222"));
+    tslist.add(Timestamp.valueOf("1999-01-01 00:00:00.999999999"));
+    tslist.add(Timestamp.valueOf("1995-01-01 00:00:00.688888888"));
+    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00.1"));
+    tslist.add(Timestamp.valueOf("2010-03-02 00:00:00.000009001"));
+    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00.000002229"));
+    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00.900203003"));
+    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00.800000007"));
+    tslist.add(Timestamp.valueOf("1996-08-02 00:00:00.723100809"));
+    tslist.add(Timestamp.valueOf("1998-11-02 00:00:00.857340643"));
+    tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
+
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
+    TimestampColumnVector vec = new TimestampColumnVector(1024);
+    batch.cols[0] = vec;
+    batch.reset();
+    batch.size = tslist.size();
+    for (int i=0; i < tslist.size(); ++i) {
+      Timestamp ts = tslist.get(i);
+      vec.set(i, ts);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    TimestampColumnVector timestamps = (TimestampColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(tslist.get(idx++).getNanos(),
+            timestamps.asScratchTimestamp(r).getNanos());
+      }
+    }
+    Assert.assertEquals(tslist.size(), rows.getRowNumber());
+    assertEquals(0, writer.getSchema().getMaximumId());
+    boolean[] expected = new boolean[] {false};
+    boolean[] included = OrcUtils.includeColumns("", writer.getSchema());
+    assertEquals(true, Arrays.equals(expected, included));
+  }
+
+  @Test
+  public void testStringAndBinaryStatistics() throws Exception {
+
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("bytes1", TypeDescription.createBinary())
+        .addField("string1", TypeDescription.createString());
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 4;
+    BytesColumnVector field1 = (BytesColumnVector) batch.cols[0];
+    BytesColumnVector field2 = (BytesColumnVector) batch.cols[1];
+    field1.setVal(0, bytesArray(0, 1, 2, 3, 4));
+    field1.setVal(1, bytesArray(0, 1, 2, 3));
+    field1.setVal(2, bytesArray(0, 1, 2, 3, 4, 5));
+    field1.noNulls = false;
+    field1.isNull[3] = true;
+    field2.setVal(0, "foo".getBytes());
+    field2.setVal(1, "bar".getBytes());
+    field2.noNulls = false;
+    field2.isNull[2] = true;
+    field2.setVal(3, "hi".getBytes());
+    writer.addRowBatch(batch);
+    writer.close();
+    schema = writer.getSchema();
+    assertEquals(2, schema.getMaximumId());
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    boolean[] expected = new boolean[] {false, false, true};
+    boolean[] included = OrcUtils.includeColumns("string1", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    expected = new boolean[] {false, false, false};
+    included = OrcUtils.includeColumns("", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    expected = new boolean[] {false, false, false};
+    included = OrcUtils.includeColumns(null, schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(4, stats[0].getNumberOfValues());
+    assertEquals("count: 4 hasNull: false", stats[0].toString());
+
+    assertEquals(3, stats[1].getNumberOfValues());
+    assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
+    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
+
+    assertEquals(3, stats[2].getNumberOfValues());
+    assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
+    assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
+    assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
+    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
+        stats[2].toString());
+
+    // check the inspectors
+    batch = reader.getSchema().createRowBatch();
+    BytesColumnVector bytes = (BytesColumnVector) batch.cols[0];
+    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
+    RecordReader rows = reader.rows();
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(4, batch.size);
+
+    // check the contents of the first row
+    assertEquals(bytes(0,1,2,3,4), getBinary(bytes, 0));
+    assertEquals("foo", strs.toString(0));
+
+    // check the contents of second row
+    assertEquals(bytes(0,1,2,3), getBinary(bytes, 1));
+    assertEquals("bar", strs.toString(1));
+
+    // check the contents of third row
+    assertEquals(bytes(0,1,2,3,4,5), getBinary(bytes, 2));
+    assertNull(strs.toString(2));
+
+    // check the contents of fourth row
+    assertNull(getBinary(bytes, 3));
+    assertEquals("hi", strs.toString(3));
+
+    // handle the close up
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    rows.close();
+  }
+
+
+  @Test
+  public void testStripeLevelStats() throws Exception {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("int1", TypeDescription.createInt())
+        .addField("string1", TypeDescription.createString());
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 1000;
+    LongColumnVector field1 = (LongColumnVector) batch.cols[0];
+    BytesColumnVector field2 = (BytesColumnVector) batch.cols[1];
+    field1.isRepeating = true;
+    field2.isRepeating = true;
+    for (int b = 0; b < 11; b++) {
+      if (b >= 5) {
+        if (b >= 10) {
+          field1.vector[0] = 3;
+          field2.setVal(0, "three".getBytes());
+        } else {
+          field1.vector[0] = 2;
+          field2.setVal(0, "two".getBytes());
+        }
+      } else {
+        field1.vector[0] = 1;
+        field2.setVal(0, "one".getBytes());
+      }
+      writer.addRowBatch(batch);
+    }
+
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    schema = writer.getSchema();
+    assertEquals(2, schema.getMaximumId());
+    boolean[] expected = new boolean[] {false, true, false};
+    boolean[] included = OrcUtils.includeColumns("int1", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    List<StripeStatistics> stats = reader.getStripeStatistics();
+    int numStripes = stats.size();
+    assertEquals(3, numStripes);
+    StripeStatistics ss1 = stats.get(0);
+    StripeStatistics ss2 = stats.get(1);
+    StripeStatistics ss3 = stats.get(2);
+
+    assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
+    assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
+    assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues());
+
+    assertEquals(5000, (ss1.getColumnStatistics()[1]).getNumberOfValues());
+    assertEquals(5000, (ss2.getColumnStatistics()[1]).getNumberOfValues());
+    assertEquals(1000, (ss3.getColumnStatistics()[1]).getNumberOfValues());
+    assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMinimum());
+    assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum());
+    assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum());
+    assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMaximum());
+    assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMaximum());
+    assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMaximum());
+    assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum());
+    assertEquals(10000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum());
+    assertEquals(3000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum());
+
+    assertEquals(5000, (ss1.getColumnStatistics()[2]).getNumberOfValues());
+    assertEquals(5000, (ss2.getColumnStatistics()[2]).getNumberOfValues());
+    assertEquals(1000, (ss3.getColumnStatistics()[2]).getNumberOfValues());
+    assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMinimum());
+    assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum());
+    assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum());
+    assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum());
+    assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMaximum());
+    assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum());
+    assertEquals(15000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum());
+    assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum());
+    assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
+
+    RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows();
+    OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null, null).getRowGroupIndex();
+    assertEquals(3, index.length);
+    List<OrcProto.RowIndexEntry> items = index[1].getEntryList();
+    assertEquals(1, items.size());
+    assertEquals(3, items.get(0).getPositionsCount());
+    assertEquals(0, items.get(0).getPositions(0));
+    assertEquals(0, items.get(0).getPositions(1));
+    assertEquals(0, items.get(0).getPositions(2));
+    assertEquals(1,
+                 items.get(0).getStatistics().getIntStatistics().getMinimum());
+    index = recordReader.readRowIndex(1, null, null).getRowGroupIndex();
+    assertEquals(3, index.length);
+    items = index[1].getEntryList();
+    assertEquals(2,
+        items.get(0).getStatistics().getIntStatistics().getMaximum());
+  }
+
+  private static void setInner(StructColumnVector inner, int rowId,
+                               int i, String value) {
+    ((LongColumnVector) inner.fields[0]).vector[rowId] = i;
+    if (value != null) {
+      ((BytesColumnVector) inner.fields[1]).setVal(rowId, value.getBytes());
+    } else {
+      inner.fields[1].isNull[rowId] = true;
+      inner.fields[1].noNulls = false;
+    }
+  }
+
+  private static void checkInner(StructColumnVector inner, int rowId,
+                                 int rowInBatch, int i, String value) {
+    assertEquals("row " + rowId, i,
+        ((LongColumnVector) inner.fields[0]).vector[rowInBatch]);
+    if (value != null) {
+      assertEquals("row " + rowId, value,
+          ((BytesColumnVector) inner.fields[1]).toString(rowInBatch));
+    } else {
+      assertEquals("row " + rowId, true, inner.fields[1].isNull[rowInBatch]);
+      assertEquals("row " + rowId, false, inner.fields[1].noNulls);
+    }
+  }
+
+  private static void setInnerList(ListColumnVector list, int rowId,
+                                   List<InnerStruct> value) {
+    if (value != null) {
+      if (list.childCount + value.size() > list.child.isNull.length) {
+        list.child.ensureSize(list.childCount * 2, true);
+      }
+      list.lengths[rowId] = value.size();
+      list.offsets[rowId] = list.childCount;
+      for (int i = 0; i < list.lengths[rowId]; ++i) {
+        InnerStruct inner = value.get(i);
+        setInner((StructColumnVector) list.child, i + list.childCount,
+            inner.int1, inner.string1.toString());
+      }
+      list.childCount += value.size();
+    } else {
+      list.isNull[rowId] = true;
+      list.noNulls = false;
+    }
+  }
+
+  private static void checkInnerList(ListColumnVector list, int rowId,
+                                     int rowInBatch, List<InnerStruct> value) {
+    if (value != null) {
+      assertEquals("row " + rowId, value.size(), list.lengths[rowInBatch]);
+      int start = (int) list.offsets[rowInBatch];
+      for (int i = 0; i < list.lengths[rowInBatch]; ++i) {
+        InnerStruct inner = value.get(i);
+        checkInner((StructColumnVector) list.child, rowId, i + start,
+            inner.int1, inner.string1.toString());
+      }
+      list.childCount += value.size();
+    } else {
+      assertEquals("row " + rowId, true, list.isNull[rowInBatch]);
+      assertEquals("row " + rowId, false, list.noNulls);
+    }
+  }
+
+  private static void setInnerMap(MapColumnVector map, int rowId,
+                                  Map<String, InnerStruct> value) {
+    if (value != null) {
+      if (map.childCount >= map.keys.isNull.length) {
+        map.keys.ensureSize(map.childCount * 2, true);
+        map.values.ensureSize(map.childCount * 2, true);
+      }
+      map.lengths[rowId] = value.size();
+      int offset = map.childCount;
+      map.offsets[rowId] = offset;
+
+      for (Map.Entry<String, InnerStruct> entry : value.entrySet()) {
+        ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
+        InnerStruct inner = entry.getValue();
+        setInner((StructColumnVector) map.values, offset, inner.int1,
+            inner.string1.toString());
+        offset += 1;
+      }
+      map.childCount = offset;
+    } else {
+      map.isNull[rowId] = true;
+      map.noNulls = false;
+    }
+  }
+
+  private static void checkInnerMap(MapColumnVector map, int rowId,
+                                    int rowInBatch,
+                                    Map<String, InnerStruct> value) {
+    if (value != null) {
+      assertEquals("row " + rowId, value.size(), map.lengths[rowInBatch]);
+      int offset = (int) map.offsets[rowInBatch];
+      for(int i=0; i < value.size(); ++i) {
+        String key = ((BytesColumnVector) map.keys).toString(offset + i);
+        InnerStruct expected = value.get(key);
+        checkInner((StructColumnVector) map.values, rowId, offset + i,
+            expected.int1, expected.string1.toString());
+      }
+    } else {
+      assertEquals("row " + rowId, true, map.isNull[rowId]);
+      assertEquals("row " + rowId, false, map.noNulls);
+    }
+  }
+
+  private static void setMiddleStruct(StructColumnVector middle, int rowId,
+                                      MiddleStruct value) {
+    if (value != null) {
+      setInnerList((ListColumnVector) middle.fields[0], rowId, value.list);
+    } else {
+      middle.isNull[rowId] = true;
+      middle.noNulls = false;
+    }
+  }
+
+  private static void checkMiddleStruct(StructColumnVector middle, int rowId,
+                                        int rowInBatch, MiddleStruct value) {
+    if (value != null) {
+      checkInnerList((ListColumnVector) middle.fields[0], rowId, rowInBatch,
+          value.list);
+    } else {
+      assertEquals("row " + rowId, true, middle.isNull[rowInBatch]);
+      assertEquals("row " + rowId, false, middle.noNulls);
+    }
+  }
+
+  private static void setBigRow(VectorizedRowBatch batch, int rowId,
+                                Boolean b1, Byte b2, Short s1,
+                                Integer i1, Long l1, Float f1,
+                                Double d1, BytesWritable b3, String s2,
+                                MiddleStruct m1, List<InnerStruct> l2,
+                                Map<String, InnerStruct> m2) {
+    ((LongColumnVector) batch.cols[0]).vector[rowId] = b1 ? 1 : 0;
+    ((LongColumnVector) batch.cols[1]).vector[rowId] = b2;
+    ((LongColumnVector) batch.cols[2]).vector[rowId] = s1;
+    ((LongColumnVector) batch.cols[3]).vector[rowId] = i1;
+    ((LongColumnVector) batch.cols[4]).vector[rowId] = l1;
+    ((DoubleColumnVector) batch.cols[5]).vector[rowId] = f1;
+    ((DoubleColumnVector) batch.cols[6]).vector[rowId] = d1;
+    if (b3 != null) {
+      ((BytesColumnVector) batch.cols[7]).setVal(rowId, b3.getBytes(), 0,
+          b3.getLength());
+    } else {
+      batch.cols[7].isNull[rowId] = true;
+      batch.cols[7].noNulls = false;
+    }
+    if (s2 != null) {
+      ((BytesColumnVector) batch.cols[8]).setVal(rowId, s2.getBytes());
+    } else {
+      batch.cols[8].isNull[rowId] = true;
+      batch.cols[8].noNulls = false;
+    }
+    setMiddleStruct((StructColumnVector) batch.cols[9], rowId, m1);
+    setInnerList((ListColumnVector) batch.cols[10], rowId, l2);
+    setInnerMap((MapColumnVector) batch.cols[11], rowId, m2);
+  }
+
+  private static void checkBigRow(VectorizedRowBatch batch,
+                                  int rowInBatch,
+                                  int rowId,
+                                  boolean b1, byte b2, short s1,
+                                  int i1, long l1, float f1,
+                                  double d1, BytesWritable b3, String s2,
+                                  MiddleStruct m1, List<InnerStruct> l2,
+                                  Map<String, InnerStruct> m2) {
+    assertEquals("row " + rowId, b1, getBoolean(batch, rowInBatch));
+    assertEquals("row " + rowId, b2, getByte(batch, rowInBatch));
+    assertEquals("row " + rowId, s1, getShort(batch, rowInBatch));
+    assertEquals("row " + rowId, i1, getInt(batch, rowInBatch));
+    assertEquals("row " + rowId, l1, getLong(batch, rowInBatch));
+    assertEquals("row " + rowId, f1, getFloat(batch, rowInBatch), 0.0001);
+    assertEquals("row " + rowId, d1, getDouble(batch, rowInBatch), 0.0001);
+    if (b3 != null) {
+      BytesColumnVector bytes = (BytesColumnVector) batch.cols[7];
+      assertEquals("row " + rowId, b3.getLength(), bytes.length[rowInBatch]);
+      for(int i=0; i < b3.getLength(); ++i) {
+        assertEquals("row " + rowId + " byte " + i, b3.getBytes()[i],
+            bytes.vector[rowInBatch][bytes.start[rowInBatch] + i]);
+      }
+    } else {
+      assertEquals("row " + rowId, true, batch.cols[7].isNull[rowInBatch]);
+      assertEquals("row " + rowId, false, batch.cols[7].noNulls);
+    }
+    if (s2 != null) {
+      assertEquals("row " + rowId, s2, getText(batch, rowInBatch).toString());
+    } else {
+      assertEquals("row " + rowId, true, batch.cols[8].isNull[rowInBatch]);
+      assertEquals("row " + rowId, false, batch.cols[8].noNulls);
+    }
+    checkMiddleStruct((StructColumnVector) batch.cols[9], rowId, rowInBatch,
+        m1);
+    checkInnerList((ListColumnVector) batch.cols[10], rowId, rowInBatch, l2);
+    checkInnerMap((MapColumnVector) batch.cols[11], rowId, rowInBatch, m2);
+  }
+
+  private static boolean getBoolean(VectorizedRowBatch batch, int rowId) {
+    return ((LongColumnVector) batch.cols[0]).vector[rowId] != 0;
+  }
+
+  private static byte getByte(VectorizedRowBatch batch, int rowId) {
+    return (byte) ((LongColumnVector) batch.cols[1]).vector[rowId];
+  }
+
+  private static short getShort(VectorizedRowBatch batch, int rowId) {
+    return (short) ((LongColumnVector) batch.cols[2]).vector[rowId];
+  }
+
+  private static int getInt(VectorizedRowBatch batch, int rowId) {
+    return (int) ((LongColumnVector) batch.cols[3]).vector[rowId];
+  }
+
+  private static long getLong(VectorizedRowBatch batch, int rowId) {
+    return ((LongColumnVector) batch.cols[4]).vector[rowId];
+  }
+
+  private static float getFloat(VectorizedRowBatch batch, int rowId) {
+    return (float) ((DoubleColumnVector) batch.cols[5]).vector[rowId];
+  }
+
+  private static double getDouble(VectorizedRowBatch batch, int rowId) {
+    return ((DoubleColumnVector) batch.cols[6]).vector[rowId];
+  }
+
+  private static BytesWritable getBinary(BytesColumnVector column, int rowId) {
+    if (column.isRepeating) {
+      rowId = 0;
+    }
+    if (column.noNulls || !column.isNull[rowId]) {
+      return new BytesWritable(Arrays.copyOfRange(column.vector[rowId],
+          column.start[rowId], column.start[rowId] + column.length[rowId]));
+    } else {
+      return null;
+    }
+  }
+
+  private static BytesWritable getBinary(VectorizedRowBatch batch, int rowId) {
+    return getBinary((BytesColumnVector) batch.cols[7], rowId);
+  }
+
+  private static Text getText(BytesColumnVector vector, int rowId) {
+    if (vector.isRepeating) {
+      rowId = 0;
+    }
+    if (vector.noNulls || !vector.isNull[rowId]) {
+      return new Text(Arrays.copyOfRange(vector.vector[rowId],
+          vector.start[rowId], vector.start[rowId] + vector.length[rowId]));
+    } else {
+      return null;
+    }
+  }
+
+  private static Text getText(VectorizedRowBatch batch, int rowId) {
+    return getText((BytesColumnVector) batch.cols[8], rowId);
+  }
+
+  private static InnerStruct getInner(StructColumnVector vector,
+                                      int rowId) {
+    return new InnerStruct(
+        (int) ((LongColumnVector) vector.fields[0]).vector[rowId],
+        getText((BytesColumnVector) vector.fields[1], rowId));
+  }
+
+  private static List<InnerStruct> getList(ListColumnVector cv,
+                                           int rowId) {
+    if (cv.isRepeating) {
+      rowId = 0;
+    }
+    if (cv.noNulls || !cv.isNull[rowId]) {
+      List<InnerStruct> result =
+          new ArrayList<InnerStruct>((int) cv.lengths[rowId]);
+      for(long i=cv.offsets[rowId];
+          i < cv.offsets[rowId] + cv.lengths[rowId]; ++i) {
+        result.add(getInner((StructColumnVector) cv.child, (int) i));
+      }
+      return result;
+    } else {
+      return null;
+    }
+  }
+
+  private static List<InnerStruct> getMidList(VectorizedRowBatch batch,
+                                              int rowId) {
+    return getList((ListColumnVector) ((StructColumnVector) batch.cols[9])
+        .fields[0], rowId);
+  }
+
+  private static List<InnerStruct> getList(VectorizedRowBatch batch,
+                                           int rowId) {
+    return getList((ListColumnVector) batch.cols[10], rowId);
+  }
+
+  private static Map<Text, InnerStruct> getMap(VectorizedRowBatch batch,
+                                               int rowId) {
+    MapColumnVector cv = (MapColumnVector) batch.cols[11];
+    if (cv.isRepeating) {
+      rowId = 0;
+    }
+    if (cv.noNulls || !cv.isNull[rowId]) {
+      Map<Text, InnerStruct> result =
+          new HashMap<Text, InnerStruct>((int) cv.lengths[rowId]);
+      for(long i=cv.offsets[rowId];
+          i < cv.offsets[rowId] + cv.lengths[rowId]; ++i) {
+        result.put(getText((BytesColumnVector) cv.keys, (int) i),
+            getInner((StructColumnVector) cv.values, (int) i));
+      }
+      return result;
+    } else {
+      return null;
+    }
+  }
+
+  private static TypeDescription createInnerSchema() {
+    return TypeDescription.createStruct()
+        .addField("int1", TypeDescription.createInt())
+        .addField("string1", TypeDescription.createString());
+  }
+
+  private static TypeDescription createBigRowSchema() {
+    return TypeDescription.createStruct()
+        .addField("boolean1", TypeDescription.createBoolean())
+        .addField("byte1", TypeDescription.createByte())
+        .addField("short1", TypeDescription.createShort())
+        .addField("int1", TypeDescription.createInt())
+        .addField("long1", TypeDescription.createLong())
+        .addField("float1", TypeDescription.createFloat())
+        .addField("double1", TypeDescription.createDouble())
+        .addField("bytes1", TypeDescription.createBinary())
+        .addField("string1", TypeDescription.createString())
+        .addField("middle", TypeDescription.createStruct()
+            .addField("list", TypeDescription.createList(createInnerSchema())))
+        .addField("list", TypeDescription.createList(createInnerSchema()))
+        .addField("map", TypeDescription.createMap(
+            TypeDescription.createString(),
+            createInnerSchema()));
+  }
+
+  static void assertArrayEquals(boolean[] expected, boolean[] actual) {
+    assertEquals(expected.length, actual.length);
+    boolean diff = false;
+    for(int i=0; i < expected.length; ++i) {
+      if (expected[i] != actual[i]) {
+        System.out.println("Difference at " + i + " expected: " + expected[i] +
+          " actual: " + actual[i]);
+        diff = true;
+      }
+    }
+    assertEquals(false, diff);
+  }
+
+  @Test
+  public void test1() throws Exception {
+    TypeDescription schema = createBigRowSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 2;
+    setBigRow(batch, 0, false, (byte) 1, (short) 1024, 65536,
+        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0, 1, 2, 3, 4), "hi",
+        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+        list(inner(3, "good"), inner(4, "bad")),
+        map());
+    setBigRow(batch, 1, true, (byte) 100, (short) 2048, 65536,
+        Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
+        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
+        list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
+        map(inner(5, "chani"), inner(1, "mauddib")));
+    writer.addRowBatch(batch);
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    schema = writer.getSchema();
+    assertEquals(23, schema.getMaximumId());
+    boolean[] expected = new boolean[] {false, false, false, false, false,
+        false, false, false, false, false,
+        false, false, false, false, false,
+        false, false, false, false, false,
+        false, false, false, false};
+    boolean[] included = OrcUtils.includeColumns("", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    expected = new boolean[] {false, true, false, false, false,
+        false, false, false, false, true,
+        true, true, true, true, true,
+        false, false, false, false, true,
+        true, true, true, true};
+    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
+
+    assertArrayEquals(expected, included);
+
+    expected = new boolean[] {false, true, false, false, false,
+        false, false, false, false, true,
+        true, true, true, true, true,
+        false, false, false, false, true,
+        true, true, true, true};
+    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
+    assertArrayEquals(expected, included);
+
+    expected = new boolean[] {false, true, true, true, true,
+        true, true, true, true, true,
+        true, true, true, true, true,
+        true, true, true, true, true,
+        true, true, true, true};
+    included = OrcUtils.includeColumns(
+        "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
+        schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(2, stats[1].getNumberOfValues());
+    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
+    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
+    assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
+
+    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
+    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
+    assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
+    assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
+        stats[3].toString());
+
+    StripeStatistics ss = reader.getStripeStatistics().get(0);
+    assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues());
+    assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount());
+    assertEquals(1024, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMinimum());
+    assertEquals(2048, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMaximum());
+    assertEquals(3072, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getSum());
+    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum(), 0.0001);
+    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum(), 0.0001);
+    assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
+    assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
+        stats[7].toString());
+
+    assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString());
+
+    // check the schema
+    TypeDescription readerSchema = reader.getSchema();
+    assertEquals(TypeDescription.Category.STRUCT, readerSchema.getCategory());
+    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
+        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
+        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
+        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
+        + "map:map<string,struct<int1:int,string1:string>>>",
+        readerSchema.toString());
+    List<String> fieldNames = readerSchema.getFieldNames();
+    List<TypeDescription> fieldTypes = readerSchema.getChildren();
+    assertEquals("boolean1", fieldNames.get(0));
+    assertEquals(TypeDescription.Category.BOOLEAN, fieldTypes.get(0).getCategory());
+    assertEquals("byte1", fieldNames.get(1));
+    assertEquals(TypeDescription.Category.BYTE, fieldTypes.get(1).getCategory());
+    assertEquals("short1", fieldNames.get(2));
+    assertEquals(TypeDescription.Category.SHORT, fieldTypes.get(2).getCategory());
+    assertEquals("int1", fieldNames.get(3));
+    assertEquals(TypeDescription.Category.INT, fieldTypes.get(3).getCategory());
+    assertEquals("long1", fieldNames.get(4));
+    assertEquals(TypeDescription.Category.LONG, fieldTypes.get(4).getCategory());
+    assertEquals("float1", fieldNames.get(5));
+    assertEquals(TypeDescription.Category.FLOAT, fieldTypes.get(5).getCategory());
+    assertEquals("double1", fieldNames.get(6));
+    assertEquals(TypeDescription.Category.DOUBLE, fieldTypes.get(6).getCategory());
+    assertEquals("bytes1", fieldNames.get(7));
+    assertEquals(TypeDescription.Category.BINARY, fieldTypes.get(7).getCategory());
+    assertEquals("string1", fieldNames.get(8));
+    assertEquals(TypeDescription.Category.STRING, fieldTypes.get(8).getCategory());
+    assertEquals("middle", fieldNames.get(9));
+    TypeDescription middle = fieldTypes.get(9);
+    assertEquals(TypeDescription.Category.STRUCT, middle.getCategory());
+    TypeDescription midList = middle.getChildren().get(0);
+    assertEquals(TypeDescription.Category.LIST, midList.getCategory());
+    TypeDescription inner = midList.getChildren().get(0);
+    assertEquals(TypeDescription.Category.STRUCT, inner.getCategory());
+    assertEquals("int1", inner.getFieldNames().get(0));
+    assertEquals("string1", inner.getFieldNames().get(1));
+
+    RecordReader rows = reader.rows();
+    // create a new batch
+    batch = readerSchema.createRowBatch();
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(2, batch.size);
+    Assert.assertEquals(false, rows.nextBatch(batch));
+
+    // check the contents of the first row
+    assertEquals(false, getBoolean(batch, 0));
+    assertEquals(1, getByte(batch, 0));
+    assertEquals(1024, getShort(batch, 0));
+    assertEquals(65536, getInt(batch, 0));
+    assertEquals(Long.MAX_VALUE, getLong(batch, 0));
+    assertEquals(1.0, getFloat(batch, 0), 0.00001);
+    assertEquals(-15.0, getDouble(batch, 0), 0.00001);
+    assertEquals(bytes(0,1,2,3,4), getBinary(batch, 0));
+    assertEquals("hi", getText(batch, 0).toString());
+    List<InnerStruct> midRow = getMidList(batch, 0);
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1, midRow.get(0).int1);
+    assertEquals("bye", midRow.get(0).string1.toString());
+    assertEquals(2, midRow.get(1).int1);
+    assertEquals("sigh", midRow.get(1).string1.toString());
+    List<InnerStruct> list = getList(batch, 0);
+    assertEquals(2, list.size());
+    assertEquals(3, list.get(0).int1);
+    assertEquals("good", list.get(0).string1.toString());
+    assertEquals(4, list.get(1).int1);
+    assertEquals("bad", list.get(1).string1.toString());
+    Map<Text, InnerStruct> map = getMap(batch, 0);
+    assertEquals(0, map.size());
+
+    // check the contents of second row
+    assertEquals(true, getBoolean(batch, 1));
+    assertEquals(100, getByte(batch, 1));
+    assertEquals(2048, getShort(batch, 1));
+    assertEquals(65536, getInt(batch, 1));
+    assertEquals(Long.MAX_VALUE, getLong(batch, 1));
+    assertEquals(2.0, getFloat(batch, 1), 0.00001);
+    assertEquals(-5.0, getDouble(batch, 1), 0.00001);
+    assertEquals(bytes(), getBinary(batch, 1));
+    assertEquals("bye", getText(batch, 1).toString());
+    midRow = getMidList(batch, 1);
+    assertNotNull(midRow);
+    assertEquals(2, midRow.size());
+    assertEquals(1, midRow.get(0).int1);
+    assertEquals("bye", midRow.get(0).string1.toString());
+    assertEquals(2, midRow.get(1).int1);
+    assertEquals("sigh", midRow.get(1).string1.toString());
+    list = getList(batch, 1);
+    assertEquals(3, list.size());
+    assertEquals(100000000, list.get(0).int1);
+    assertEquals("cat", list.get(0).string1.toString());
+    assertEquals(-100000, list.get(1).int1);
+    assertEquals("in", list.get(1).string1.toString());
+    assertEquals(1234, list.get(2).int1);
+    assertEquals("hat", list.get(2).string1.toString());
+    map = getMap(batch, 1);
+    assertEquals(2, map.size());
+    InnerStruct value = map.get(new Text("chani"));
+    assertEquals(5, value.int1);
+    assertEquals("chani", value.string1.toString());
+    value = map.get(new Text("mauddib"));
+    assertEquals(1, value.int1);
+    assertEquals("mauddib", value.string1.toString());
+
+    // handle the close up
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    rows.close();
+  }
+
+  @Test
+  public void testColumnProjection() throws Exception {
+    TypeDescription schema = createInnerSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100)
+                                         .rowIndexStride(1000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    Random r1 = new Random(1);
+    Random r2 = new Random(2);
+    int x;
+    int minInt=0, maxInt=0;
+    String y;
+    String minStr = null, maxStr = null;
+    batch.size = 1000;
+    boolean first = true;
+    for(int b=0; b < 21; ++b) {
+      for(int r=0; r < 1000; ++r) {
+        x = r1.nextInt();
+        y = Long.toHexString(r2.nextLong());
+        if (first || x < minInt) {
+          minInt = x;
+        }
+        if (first || x > maxInt) {
+          maxInt = x;
+        }
+        if (first || y.compareTo(minStr) < 0) {
+          minStr = y;
+        }
+        if (first || y.compareTo(maxStr) > 0) {
+          maxStr = y;
+        }
+        first = false;
+        ((LongColumnVector) batch.cols[0]).vector[r] = x;
+        ((BytesColumnVector) batch.cols[1]).setVal(r, y.getBytes());
+      }
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    // check out the statistics
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(3, stats.length);
+    for(ColumnStatistics s: stats) {
+      assertEquals(21000, s.getNumberOfValues());
+      if (s instanceof IntegerColumnStatistics) {
+        assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
+        assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
+      } else if (s instanceof  StringColumnStatistics) {
+        assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
+        assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
+      }
+    }
+
+    // check out the types
+    TypeDescription type = reader.getSchema();
+    assertEquals(TypeDescription.Category.STRUCT, type.getCategory());
+    assertEquals(2, type.getChildren().size());
+    TypeDescription type1 = type.getChildren().get(0);
+    TypeDescription type2 = type.getChildren().get(1);
+    assertEquals(TypeDescription.Category.INT, type1.getCategory());
+    assertEquals(TypeDescription.Category.STRING, type2.getCategory());
+    assertEquals("struct<int1:int,string1:string>", type.toString());
+
+    // read the contents and make sure they match
+    RecordReader rows1 = reader.rows(
+        new Reader.Options().include(new boolean[]{true, true, false}));
+    RecordReader rows2 = reader.rows(
+        new Reader.Options().include(new boolean[]{true, false, true}));
+    r1 = new Random(1);
+    r2 = new Random(2);
+    VectorizedRowBatch batch1 = reader.getSchema().createRowBatch(1000);
+    VectorizedRowBatch batch2 = reader.getSchema().createRowBatch(1000);
+    for(int i = 0; i < 21000; i += 1000) {
+      Assert.assertEquals(true, rows1.nextBatch(batch1));
+      Assert.assertEquals(true, rows2.nextBatch(batch2));
+      assertEquals(1000, batch1.size);
+      assertEquals(1000, batch2.size);
+      for(int j=0; j < 1000; ++j) {
+        assertEquals(r1.nextInt(),
+            ((LongColumnVector) batch1.cols[0]).vector[j]);
+        assertEquals(Long.toHexString(r2.nextLong()),
+            ((BytesColumnVector) batch2.cols[1]).toString(j));
+      }
+    }
+    Assert.assertEquals(false, rows1.nextBatch(batch1));
+    Assert.assertEquals(false, rows2.nextBatch(batch2));
+    rows1.close();
+    rows2.close();
+  }
+
+  @Test
+  public void testEmptyFile() throws Exception {
+    TypeDescription schema = createBigRowSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100));
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
+    Assert.assertEquals(false, reader.rows().nextBatch(batch));
+    Assert.assertEquals(CompressionKind.NONE, reader.getCompressionKind());
+    Assert.assertEquals(0, reader.getNumberOfRows());
+    Assert.assertEquals(0, reader.getCompressionSize());
+    Assert.assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+    Assert.assertEquals(3, reader.getContentLength());
+    Assert.assertEquals(false, reader.getStripes().iterator().hasNext());
+  }
+
+  @Test
+  public void metaData() throws Exception {
+    TypeDescription schema = createBigRowSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(1000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(100));
+    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127,
+                                              -128));
+    writer.addUserMetadata("clobber", byteBuf(1, 2, 3));
+    writer.addUserMetadata("clobber", byteBuf(4, 3, 2, 1));
+    ByteBuffer bigBuf = ByteBuffer.allocate(40000);
+    Random random = new Random(0);
+    random.nextBytes(bigBuf.array());
+    writer.addUserMetadata("big", bigBuf);
+    bigBuf.position(0);
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 1;
+    setBigRow(batch, 0, true, (byte) 127, (short) 1024, 42,
+        42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
+        null, null, null, null);
+    writer.addRowBatch(batch);
+    writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    Assert.assertEquals(byteBuf(5, 7, 11, 13, 17, 19), reader.getMetadataValue("clobber"));
+    Assert.assertEquals(byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128),
+        reader.getMetadataValue("my.meta"));
+    Assert.assertEquals(bigBuf, reader.getMetadataValue("big"));
+    try {
+      reader.getMetadataValue("unknown");
+      assertTrue(false);
+    } catch (IllegalArgumentException iae) {
+      // PASS
+    }
+    int i = 0;
+    for(String key: reader.getMetadataKeys()) {
+      if ("my.meta".equals(key) ||
+          "clobber".equals(key) ||
+          "big".equals(key)) {
+        i += 1;
+      } else {
+        throw new IllegalArgumentException("unknown key " + key);
+      }
+    }
+    assertEquals(3, i);
+    int numStripes = reader.getStripeStatistics().size();
+    assertEquals(1, numStripes);
+  }
+
+  /**
+   * Generate an ORC file with a range of dates and times.
+   */
+  public void createOrcDateFile(Path file, int minYear, int maxYear
+                                ) throws IOException {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("time", TypeDescription.createTimestamp())
+        .addField("date", TypeDescription.createDate());
+    Writer writer = OrcFile.createWriter(file,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .blockPadding(false));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 1000;
+    for (int year = minYear; year < maxYear; ++year) {
+      for (int ms = 1000; ms < 2000; ++ms) {
+        TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0];
+        timestampColVector.set(ms - 1000,
+            Timestamp.valueOf(year +
+                "-05-05 12:34:56." + ms));
+        ((LongColumnVector) batch.cols[1]).vector[ms - 1000] =
+            new DateWritable(new Date(year - 1900, 11, 25)).getDays();
+      }
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(file,
+        OrcFile.readerOptions(conf));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch(1000);
+    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
+    LongColumnVector dates = (LongColumnVector) batch.cols[1];
+    for (int year = minYear; year < maxYear; ++year) {
+      rows.nextBatch(batch);
+      assertEquals(1000, batch.size);
+      for(int ms = 1000; ms < 2000; ++ms) {
+        StringBuilder buffer = new StringBuilder();
+        times.stringifyValue(buffer, ms - 1000);
+        String expected = Integer.toString(year) + "-05-05 12:34:56.";
+        // suppress the final zeros on the string by dividing by the largest
+        // power of 10 that divides evenly.
+        int roundedMs = ms;
+        for(int round = 1000; round > 0; round /= 10) {
+          if (ms % round == 0) {
+            roundedMs = ms / round;
+            break;
+          }
+        }
+        expected += roundedMs;
+        assertEquals(expected, buffer.toString());
+        assertEquals(Integer.toString(year) + "-12-25",
+            new DateWritable((int) dates.vector[ms - 1000]).toString());
+      }
+    }
+    rows.nextBatch(batch);
+    assertEquals(0, batch.size);
+  }
+
+  @Test
+  public void testDate1900() throws Exception {
+    createOrcDateFile(testFilePath, 1900, 1970);
+  }
+
+  @Test
+  public void testDate2038() throws Exception {
+    createOrcDateFile(testFilePath, 2038, 2250);
+  }
+
+  private static void setUnion(VectorizedRowBatch batch, int rowId,
+                               Timestamp ts, Integer tag, Integer i, String s,
+                               HiveDecimalWritable dec) {
+    UnionColumnVector union = (UnionColumnVector) batch.cols[1];
+    if (ts != null) {
+      TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0];
+      timestampColVector.set(rowId, ts);
+    } else {
+      batch.cols[0].isNull[rowId] = true;
+      batch.cols[0].noNulls = false;
+    }
+    if (tag != null) {
+      union.tags[rowId] = tag;
+      if (tag == 0) {
+        if (i != null) {
+          ((LongColumnVector) union.fields[tag]).vector[rowId] = i;
+        } else {
+          union.fields[tag].isNull[rowId] = true;
+          union.fields[tag].noNulls = false;
+        }
+      } else if (tag == 1) {
+        if (s != null) {
+          ((BytesColumnVector) union.fields[tag]).setVal(rowId, s.getBytes());
+        } else {
+          union.fields[tag].isNull[rowId] = true;
+          union.fields[tag].noNulls = false;
+        }
+      } else {
+        throw new IllegalArgumentException("Bad tag " + tag);
+      }
+    } else {
+      batch.cols[1].isNull[rowId] = true;
+      batch.cols[1].noNulls = false;
+    }
+    if (dec != null) {
+      ((DecimalColumnVector) batch.cols[2]).vector[rowId] = dec;
+    } else {
+      batch.cols[2].isNull[rowId] = true;
+      batch.cols[2].noNulls = false;
+    }
+  }
+
+  /**
+     * We test union, timestamp, and decimal separately since we need to make the
+     * object inspector manually. (The Hive reflection-based doesn't handle
+     * them properly.)
+     */
+  @Test
+  public void testUnionAndTimestamp() throws Exception {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("time", TypeDescription.createTimestamp())
+        .addField("union", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createInt())
+            .addUnionChild(TypeDescription.createString()))
+        .addField("decimal", TypeDescription.createDecimal()
+            .withPrecision(38)
+            .withScale(18));
+    HiveDecimal maxValue = HiveDecimal.create("10000000000000000000");
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(100)
+                                         .blockPadding(false));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 6;
+    setUnion(batch, 0, Timestamp.valueOf("2000-03-12 15:00:00"), 0, 42, null,
+             new HiveDecimalWritable("12345678.6547456"));
+    setUnion(batch, 1, Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
+        1, null, "hello", new HiveDecimalWritable("-5643.234"));
+
+    setUnion(batch, 2, null, null, null, null, null);
+    setUnion(batch, 3, null, 0, null, null, null);
+    setUnion(batch, 4, null, 1, null, null, null);
+
+    setUnion(batch, 5, Timestamp.valueOf("1970-01-01 00:00:00"), 0, 200000,
+        null, new HiveDecimalWritable("10000000000000000000"));
+    writer.addRowBatch(batch);
+
+    batch.reset();
+    Random rand = new Random(42);
+    for(int i=1970; i < 2038; ++i) {
+      Timestamp ts = Timestamp.valueOf(i + "-05-05 12:34:56." + i);
+      HiveDecimal dec =
+          HiveDecimal.create(new BigInteger(64, rand), rand.nextInt(18));
+      if ((i & 1) == 0) {
+        setUnion(batch, batch.size++, ts, 0, i*i, null,
+            new HiveDecimalWritable(dec));
+      } else {
+        setUnion(batch, batch.size++, ts, 1, null, Integer.toString(i*i),
+            new HiveDecimalWritable(dec));
+      }
+      if (maxValue.compareTo(dec) < 0) {
+        maxValue = dec;
+      }
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+
+    // let's add a lot of constant rows to test the rle
+    batch.size = 1000;
+    for(int c=0; c < batch.cols.length; ++c) {
+      batch.cols[c].setRepeating(true);
+    }
+    ((UnionColumnVector) batch.cols[1]).fields[0].isRepeating = true;
+    setUnion(batch, 0, null, 0, 1732050807, null, null);
+    for(int i=0; i < 5; ++i) {
+      writer.addRowBatch(batch);
+    }
+
+    batch.reset();
+    batch.size = 3;
+    setUnion(batch, 0, null, 0, 0, null, null);
+    setUnion(batch, 1, null, 0, 10, null, null);
+    setUnion(batch, 2, null, 0, 138, null, null);
+    writer.addRowBatch(batch);
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    schema = writer.getSchema();
+    assertEquals(5, schema.getMaximumId());
+    boolean[] expected = new boolean[] {false, false, false, false, false, false};
+    boolean[] included = OrcUtils.includeColumns("", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    expected = new boolean[] {false, true, false, false, false, true};
+    included = OrcUtils.includeColumns("time,decimal", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    expected = new boolean[] {false, false, true, true, true, false};
+    included = OrcUtils.includeColumns("union", schema);
+    assertEquals(true, Arrays.equals(expected, included));
+
+    Assert.assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
+    Assert.assertEquals(5077, reader.getNumberOfRows());
+    DecimalColumnStatistics stats =
+        (DecimalColumnStatistics) reader.getStatistics()[5];
+    assertEquals(71, stats.getNumberOfValues());
+    assertEquals(HiveDecimal.create("-5643.234"), stats.getMinimum());
+    assertEquals(maxValue, stats.getMaximum());
+    // TODO: fix this
+//    assertEquals(null,stats.getSum());
+    int stripeCount = 0;
+    int rowCount = 0;
+    long currentOffset = -1;
+    for(StripeInformation stripe: reader.getStripes()) {
+      stripeCount += 1;
+      rowCount += stripe.getNumberOfRows();
+      if (currentOffset < 0) {
+        currentOffset = stripe.getOffset() + stripe.getLength();
+      } else {
+        assertEquals(currentOffset, stripe.getOffset());
+        currentOffset += stripe.getLength();
+      }
+    }
+    Assert.assertEquals(reader.getNumberOfRows(), rowCount);
+    assertEquals(2, stripeCount);
+    Assert.assertEquals(reader.getContentLength(), currentOffset);
+    RecordReader rows = reader.rows();
+    Assert.assertEquals(0, rows.getRowNumber());
+    Assert.assertEquals(0.0, rows.getProgress(), 0.000001);
+
+    schema = reader.getSchema();
+    batch = schema.createRowBatch(74);
+    Assert.assertEquals(0, rows.getRowNumber());
+    rows.nextBatch(batch);
+    assertEquals(74, batch.size);
+    Assert.assertEquals(74, rows.getRowNumber());
+    TimestampColumnVector ts = (TimestampColumnVector) batch.cols[0];
+    UnionColumnVector union = (UnionColumnVector) batch.cols[1];
+    LongColumnVector longs = (LongColumnVector) union.fields[0];
+    BytesColumnVector strs = (BytesColumnVector) union.fields[1];
+    DecimalColumnVector decs = (DecimalColumnVector) batch.cols[2];
+
+    assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>",
+        schema.toString());
+    assertEquals("2000-03-12 15:00:00.0", ts.asScratchTimestamp(0).toString());
+    assertEquals(0, union.tags[0]);
+    assertEquals(42, longs.vector[0]);
+    assertEquals("12345678.6547456", decs.vector[0].toString());
+
+    assertEquals("2000-03-20 12:00:00.123456789", ts.asScratchTimestamp(1).toString());
+    assertEquals(1, union.tags[1]);
+    assertEquals("hello", strs.toString(1));
+    assertEquals("-5643.234", decs.vector[1].toString());
+
+    assertEquals(false, ts.noNulls);
+    assertEquals(false, union.noNulls);
+    assertEquals(false, decs.noNulls);
+    assertEquals(true, ts.isNull[2]);
+    assertEquals(true, union.isNull[2]);
+    assertEquals(true, decs.isNull[2]);
+
+    assertEquals(true, ts.isNull[3]);
+    assertEquals(false, union.isNull[3]);
+    assertEquals(0, union.tags[3]);
+    assertEquals(true, longs.isNull[3]);
+    assertEquals(true, decs.isNull[3]);
+
+    assertEquals(true, ts.isNull[4]);
+    assertEquals(false, union.isNull[4]);
+    assertEquals(1, union.tags[4]);
+    assertEquals(true, strs.isNull[4]);
+    assertEquals(true, decs.isNull[4]);
+
+    assertEquals(false, ts.isNull[5]);
+    assertEquals("1970-01-01 00:00:00.0", ts.asScratchTimestamp(5).toString());
+    assertEquals(false, union.isNull[5]);
+    assertEquals(0, union.tags[5]);
+    assertEquals(false, longs.isNull[5]);
+    assertEquals(200000, longs.vector[5]);
+    assertEquals(false, decs.isNull[5]);
+    assertEquals("10000000000000000000", decs.vector[5].toString());
+
+    rand = new Random(42);
+    for(int i=1970; i < 2038; ++i) {
+      int row = 6 + i - 1970;
+      assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
+          ts.asScratchTimestamp(row));
+      if ((i & 1) == 0) {
+        assertEquals(0, union.tags[row]);
+        assertEquals(i*i, longs.vector[row]);
+      } else {
+        assertEquals(1, union.tags[row]);
+        assertEquals(Integer.toString(i * i), strs.toString(row));
+      }
+      assertEquals(new HiveDecimalWritable(HiveDecimal.create(new BigInteger(64, rand),
+                                   rand.nextInt(18))), decs.vector[row]);
+    }
+
+    // rebuild the row batch, so that we can read by 1000 rows
+    batch = schema.createRowBatch(1000);
+    ts = (TimestampColumnVector) batch.cols[0];
+    union = (UnionColumnVector) batch.cols[1];
+    longs = (LongColumnVector) union.fields[0];
+    strs = (BytesColumnVector) union.fields[1];
+    decs = (DecimalColumnVector) batch.cols[2];
+
+    for(int i=0; i < 5; ++i) {
+      rows.nextBatch(batch);
+      assertEquals("batch " + i, 1000, batch.size);
+      assertEquals("batch " + i, false, union.isRepeating);
+      assertEquals("batch " + i, true, union.noNulls);
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals("bad tag at " + i + "." +r, 0, union.tags[r]);
+      }
+      assertEquals("batch " + i, true, longs.isRepeating);
+      assertEquals("batch " + i, 1732050807, longs.vector[0]);
+    }
+
+    rows.nextBatch(batch);
+    assertEquals(3, batch.size);
+    assertEquals(0, union.tags[0]);
+    assertEquals(0, longs.vector[0]);
+    assertEquals(0, union.tags[1]);
+    assertEquals(10, longs.vector[1]);
+    assertEquals(0, union.tags[2]);
+    assertEquals(138, longs.vector[2]);
+
+    rows.nextBatch(batch);
+    assertEquals(0, batch.size);
+    Assert.assertEquals(1.0, rows.getProgress(), 0.00001);
+    Assert.assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
+    rows.seekToRow(1);
+    rows.nextBatch(batch);
+    assertEquals(1000, batch.size);
+    assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"), ts.asScratchTimestamp(0));
+    assertEquals(1, union.tags[0]);
+    assertEquals("hello", strs.toString(0));
+    assertEquals(new HiveDecimalWritable(HiveDecimal.create("-5643.234")), decs.vector[0]);
+    rows.close();
+  }
+
+  /**
+   * Read and write a randomly generated snappy file.
+   * @throws Exception
+   */
+  @Test
+  public void testSnappy() throws Exception {
+    TypeDescription schema = createInnerSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(1000)
+                                         .compress(CompressionKind.SNAPPY)
+                                         .bufferSize(100));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    Random rand = new Random(12);
+    batch.size = 1000;
+    for(int b=0; b < 10; ++b) {
+      for (int r=0; r < 1000; ++r) {
+        ((LongColumnVector) batch.cols[0]).vector[r] = rand.nextInt();
+        ((BytesColumnVector) batch.cols[1]).setVal(r,
+            Integer.toHexString(rand.nextInt()).getBytes());
+      }
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    Assert.assertEquals(CompressionKind.SNAPPY, reader.getCompressionKind());
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch(1000);
+    rand = new Random(12);
+    LongColumnVector longs = (LongColumnVector) batch.cols[0];
+    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
+    for(int b=0; b < 10; ++b) {
+      rows.nextBatch(batch);
+      assertEquals(1000, batch.size);
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(rand.nextInt(), longs.vector[r]);
+        assertEquals(Integer.toHexString(rand.nextInt()), strs.toString(r));
+      }
+    }
+    rows.nextBatch(batch);
+    assertEquals(0, batch.size);
+    rows.close();
+  }
+
+  /**
+   * Read and write a randomly generated snappy file.
+   * @throws Exception
+   */
+  @Test
+  public void testWithoutIndex() throws Exception {
+    TypeDescription schema = createInnerSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(5000)
+                                         .compress(CompressionKind.SNAPPY)
+                                         .bufferSize(1000)
+                                         .rowIndexStride(0));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    Random rand = new Random(24);
+    batch.size = 5;
+    for(int c=0; c < batch.cols.length; ++c) {
+      batch.cols[c].setRepeating(true);
+    }
+    for(int i=0; i < 10000; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[0] = rand.nextInt();
+      ((BytesColumnVector) batch.cols[1])
+          .setVal(0, Integer.toBinaryString(rand.nextInt()).getBytes());
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    Assert.assertEquals(50000, reader.getNumberOfRows());
+    Assert.assertEquals(0, reader.getRowIndexStride());
+    StripeInformation stripe = reader.getStripes().iterator().next();
+    assertEquals(true, stripe.getDataLength() != 0);
+    assertEquals(0, stripe.getIndexLength());
+    RecordReader rows = reader.rows();
+    rand = new Random(24);
+    batch = reader.getSchema().createRowBatch(1000);
+    LongColumnVector longs = (LongColumnVector) batch.cols[0];
+    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
+    for(int i=0; i < 50; ++i) {
+      rows.nextBatch(batch);
+      assertEquals("batch " + i, 1000, batch.size);
+      for(int j=0; j < 200; ++j) {
+        int intVal = rand.nextInt();
+        String strVal = Integer.toBinaryString(rand.nextInt());
+        for (int k = 0; k < 5; ++k) {
+          assertEquals(intVal, longs.vector[j * 5 + k]);
+          assertEquals(strVal, strs.toString(j * 5 + k));
+        }
+      }
+    }
+    rows.nextBatch(batch);
+    assertEquals(0, batch.size);
+    rows.close();
+  }
+
+  @Test
+  public void testSeek() throws Exception {
+    TypeDescription schema = createBigRowSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(200000)
+                                         .bufferSize(65536)
+                                         .rowIndexStride(1000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    Random rand = new Random(42);
+    final int COUNT=32768;
+    long[] intValues= new long[COUNT];
+    double[] doubleValues = new double[COUNT];
+    String[] stringValues = new String[COUNT];
+    BytesWritable[] byteValues = new BytesWritable[COUNT];
+    String[] words = new String[128];
+    for(int i=0; i < words.length; ++i) {
+      words[i] = Integer.toHexString(rand.nextInt());
+    }
+    for(int i=0; i < COUNT/2; ++i) {
+      intValues[2*i] = rand.nextLong();
+      intValues[2*i+1] = intValues[2*i];
+      stringValues[2*i] = words[rand.nextInt(words.length)];
+      stringValues[2*i+1] = stringValues[2*i];
+    }
+    for(int i=0; i < COUNT; ++i) {
+      doubleValues[i] = rand.nextDouble();
+      byte[] buf = new byte[20];
+      rand.nextBytes(buf);
+      byteValues[i] = new BytesWritable(buf);
+    }
+    for(int i=0; i < COUNT; ++i) {
+      appendRandomRow(batch, intValues, doubleValues, stringValues,
+          byteValues, words, i);
+      if (batch.size == 1024) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size != 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    Assert.assertEquals(COUNT, reader.getNumberOfRows());
+    RecordReader rows = reader.rows();
+    // get the row index
+    DataReader meta = RecordReaderUtils.createDefaultDataReader(
+        DataReaderProperties.builder()
+            .withBufferSize(reader.getCompressionSize())
+            .withFileSystem(fs)
+            .withPath(testFilePath)
+            .withCompression(reader.getCompressionKind())
+            .withTypeCount(reader.getSchema().getMaximumId() + 1)
+            .withZeroCopy(false)
+            .build());
+    OrcIndex index =
+        meta.readRowIndex(reader.getStripes().get(0), null, null, null, null,
+            null);
+    // check the primitive columns to make sure they have the right number of
+    // items in the first row group
+    for(int c=1; c < 9; ++c) {
+      OrcProto.RowIndex colIndex = index.getRowGroupIndex()[c];
+      assertEquals(1000,
+          colIndex.getEntry(0).getStatistics().getNumberOfValues());
+    }
+    batch = reader.getSchema().createRowBatch();
+    int nextRowInBatch = -1;
+    for(int i=COUNT-1; i >= 0; --i, --nextRowInBatch) {
+      // if we have consumed the previous batch read a new one
+      if (nextRowInBatch < 0) {
+        long base = Math.max(i - 1023, 0);
+        rows.seekToRow(base);
+        Assert.assertEquals("row " + i, true, rows.nextBatch(batch));
+        nextRowInBatch = batch.size - 1;
+      }
+      checkRandomRow(batch, intValues, doubleValues,
+          stringValues, byteValues, words, i, nextRowInBatch);
+    }
+    rows.close();
+    Iterator<StripeInformation> stripeIterator =
+      reader.getStripes().iterator();
+    long offsetOfStripe2 = 0;
+    long offsetOfStripe4 = 0;
+    long lastRowOfStripe2 = 0;
+    for(int i = 0; i < 5; ++i) {
+      StripeInformation stripe = stripeIterator.next();
+      if (i < 2) {
+        lastRowOfStripe2 += stripe.getNumberOfRows();
+      } else if (i == 2) {
+        offsetOfStripe2 = stripe.getOffset();
+        lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
+      } else if (i == 4) {
+        offsetOfStripe4 = stripe.getOffset();
+      }
+    }
+    boolean[] columns = new boolean[reader.getStatistics().length];
+    columns[5] = true; // long colulmn
+    columns[9] = true; // text column
+    rows = reader.rows(new Reader.Options()
+        .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2)
+        .include(columns));
+    rows.seekToRow(lastRowOfStripe2);
+    // we only want two rows
+    batch = reader.getSchema().createRowBatch(2);
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(1, batch.size);
+    assertEquals(intValues[(int) lastRowOfStripe2], getLong(batch, 0));
+    assertEquals(stringValues[(int) lastRowOfStripe2],
+        getText(batch, 0).toString());
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(intValues[(int) lastRowOfStripe2 + 1], getLong(batch, 0));
+    assertEquals(stringValues[(int) lastRowOfStripe2 + 1],
+        getText(batch, 0).toString());
+    rows.close();
+  }
+
+  private void appendRandomRow(VectorizedRowBatch batch,
+                               long[] intValues, double[] doubleValues,
+                               String[] stringValues,
+                               BytesWritable[] byteValues,
+                               String[] words, int i) {
+    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
+    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
+        words[i % words.length] + "-x");
+    setBigRow(batch, batch.size++, (intValues[i] & 1) == 0, (byte) intValues[i],
+        (short) intValues[i], (int) intValues[i], intValues[i],
+        (float) doubleValues[i], doubleValues[i], byteValues[i], stringValues[i],
+        new MiddleStruct(inner, inner2), list(), map(inner, inner2));
+  }
+
+  private void checkRandomRow(VectorizedRowBatch batch,
+                              long[] intValues, double[] doubleValues,
+                              String[] stringValues,
+                              BytesWritable[] byteValues,
+                              String[] words, int i, int rowInBatch) {
+    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
+    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
+        words[i % words.length] + "-x");
+    checkBigRow(batch, rowInBatch, i, (intValues[i] & 1) == 0, (byte) intValues[i],
+        (short) intValues[i], (int) intValues[i], intValues[i],
+        (float) doubleValues[i], doubleValues[i], byteValues[i], stringValues[i],
+        new MiddleStruct(inner, inner2), list(), map(inner, inner2));
+  }
+
+  private static class MyMemoryManager extends MemoryManager {
+    final long totalSpace;
+    double rate;
+    Path path = null;
+    long lastAllocation = 0;
+    int rows = 0;
+    Callback callback;
+
+    MyMemoryManager(Configuration conf, long totalSpace, double rate) {
+      super(conf);
+      this.totalSpace = totalSpace;
+      this.rate = rate;
+    }
+
+    @Override
+    public void addWriter(Path path, long requestedAllocation,
+                   Callback callback) {
+      this.path = path;
+      this.lastAllocation = requestedAllocation;
+      this.callback = callback;
+    }
+
+    @Override
+    public synchronized void removeWriter(Path path) {
+      this.path = null;
+      this.lastAllocation = 0;
+    }
+
+    @Override
+    public long getTotalMemoryPool() {
+      return totalSpace;
+    }
+
+    @Override
+    public double getAllocationScale() {
+      return rate;
+    }
+
+    @Override
+    public void addedRow(int count) throws IOException {
+      rows += count;
+      if (rows % 100 == 0) {
+        callback.checkMemory(rate);
+      }
+    }
+  }
+
+  @Test
+  public void testMemoryManagementV11() throws Exception {
+    TypeDescription schema = createInnerSchema();
+    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .compress(CompressionKind.NONE)
+            .stripeSize(50000)
+            .bufferSize(100)
+            .rowIndexStride(0)
+            .memory(memory)
+            .version(OrcFile.Version.V_0_11));
+    assertEquals(testFilePath, memory.path);
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 1;
+    for(int i=0; i < 2500; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[0] = i * 300;
+      ((BytesColumnVector) batch.cols[1]).setVal(0,
+          Integer.toHexString(10*i).getBytes());
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    assertEquals(null, memory.path);
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    int i = 0;
+    for(StripeInformation stripe: reader.getStripes()) {
+      i += 1;
+      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
+          stripe.getDataLength() < 5000);
+    }
+    assertEquals(25, i);
+    assertEquals(2500, reader.getNumberOfRows());
+  }
+
+  @Test
+  public void testMemoryManagementV12() throws Exception {
+    TypeDescription schema = createInnerSchema();
+    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .compress(CompressionKind.NONE)
+                                         .stripeSize(50000)
+                                         .bufferSize(100)
+                                         .rowIndexStride(0)
+                                         .memory(memory)
+                                         .version(OrcFile.Version.V_0_12));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    assertEquals(testFilePath, memory.path);
+    batch.size = 1;
+    for(int i=0; i < 2500; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[0] = i * 300;
+      ((BytesColumnVector) batch.cols[1]).setVal(0,
+          Integer.toHexString(10*i).getBytes());
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    assertEquals(null, memory.path);
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    int i = 0;
+    for(StripeInformation stripe: reader.getStripes()) {
+      i += 1;
+      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
+          stripe.getDataLength() < 5000);
+    }
+    // with HIVE-7832, the dictionaries will be disabled after writing the first
+    // stripe as there are too many distinct values. Hence only 3 stripes as
+    // compared to 25 stripes in version 0.11 (above test case)
+    assertEquals(3, i);
+    assertEquals(2500, reader.getNumberOfRows());
+  }
+
+  @Test
+  public void testPredicatePushdown() throws Exception {
+    TypeDescription schema = createInnerSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(400000L)
+            .compress(CompressionKind.NONE)
+            .bufferSize(500)
+            .rowIndexStride(1000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.ensureSize(3500);
+    batch.size = 3500;
+    for(int i=0; i < 3500; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[i] = i * 300;
+      ((BytesColumnVector) batch.cols[1]).setVal(i,
+          Integer.toHexString(10*i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(3500, reader.getNumberOfRows());
+
+    SearchArgument sarg = SearchArgumentFactory.newBuilder()
+        .startAnd()
+          .startNot()
+             .lessThan("int1", PredicateLeaf.Type.LONG, 300000L)
+          .end()
+          .lessThan("int1", PredicateLeaf.Type.LONG, 600000L)
+        .end()
+        .build();
+    RecordReader rows = reader.rows(new Reader.Options()
+        .range(0L, Long.MAX_VALUE)
+        .include(new boolean[]{true, true, true})
+        .searchArgument(sarg, new String[]{null, "int1", "string1"}));
+    batch = reader.getSchema().createRowBatch(2000);
+    LongColumnVector ints = (LongColumnVector) batch.cols[0];
+    BytesColumnVector strs = (BytesColumnVector) batch.cols[1];
+
+    Assert.assertEquals(1000L, rows.getRowNumber());
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(1000, batch.size);
+
+    for(int i=1000; i < 2000; ++i) {
+      assertEquals(300 * i, ints.vector[i - 1000]);
+      assertEquals(Integer.toHexString(10*i), strs.toString(i - 1000));
+    }
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    Assert.assertEquals(3500, rows.getRowNumber());
+
+    // look through the file with no rows selected
+    sarg = SearchArgumentFactory.newBuilder()
+        .startAnd()
+          .lessThan("int1", PredicateLeaf.Type.LONG, 0L)
+        .end()
+        .build();
+    rows = reader.rows(new Reader.Options()
+        .range(0L, Long.MAX_VALUE)
+        .include(new boolean[]{true, true, true})
+        .searchArgument(sarg, new String[]{null, "int1", "string1"}));
+    Assert.assertEquals(3500L, rows.getRowNumber());
+    assertTrue(!rows.nextBatch(batch));
+
+    // select first 100 and last 100 rows
+    sarg = SearchArgumentFactory.newBuilder()
+        .startOr()
+          .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100)
+          .startNot()
+            .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400)
+          .end()
+        .end()
+        .build();
+    rows = reader.rows(new Reader.Options()
+        .range(0L, Long.MAX_VALUE)
+        .include(new boolean[]{true, true, true})
+        .searchArgument(sarg, new String[]{null, "int1", "string1"}));
+    Assert.assertEquals(0, rows.getRowNumber());
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(1000, batch.size);
+    Assert.assertEquals(3000, rows.getRowNumber());
+    for(int i=0; i < 1000; ++i) {
+      assertEquals(300 * i, ints.vector[i]);
+      assertEquals(Integer.toHexString(10*i), strs.toString(i));
+    }
+
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(500, batch.size);
+    Assert.assertEquals(3500, rows.getRowNumber());
+    for(int i=3000; i < 3500; ++i) {
+      assertEquals(300 * i, ints.vector[i - 3000]);
+      assertEquals(Integer.toHexString(10*i), strs.toString(i - 3000));
+    }
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    Assert.assertEquals(3500, rows.getRowNumber());
+  }
+
+  /**
+   * Test all of the types that have distinct ORC writers using the vectorized
+   * writer with different combinations of repeating and null values.


<TRUNCATED>

[27/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/SerializationUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/SerializationUtils.java b/orc/src/java/org/apache/hive/orc/impl/SerializationUtils.java
new file mode 100644
index 0000000..dabc3ca
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/SerializationUtils.java
@@ -0,0 +1,1311 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.math.BigInteger;
+
+public final class SerializationUtils {
+
+  private final static int BUFFER_SIZE = 64;
+  private final byte[] readBuffer;
+  private final byte[] writeBuffer;
+
+  public SerializationUtils() {
+    this.readBuffer = new byte[BUFFER_SIZE];
+    this.writeBuffer = new byte[BUFFER_SIZE];
+  }
+
+  public void writeVulong(OutputStream output,
+                          long value) throws IOException {
+    while (true) {
+      if ((value & ~0x7f) == 0) {
+        output.write((byte) value);
+        return;
+      } else {
+        output.write((byte) (0x80 | (value & 0x7f)));
+        value >>>= 7;
+      }
+    }
+  }
+
+  public void writeVslong(OutputStream output,
+                          long value) throws IOException {
+    writeVulong(output, (value << 1) ^ (value >> 63));
+  }
+
+
+  public long readVulong(InputStream in) throws IOException {
+    long result = 0;
+    long b;
+    int offset = 0;
+    do {
+      b = in.read();
+      if (b == -1) {
+        throw new EOFException("Reading Vulong past EOF");
+      }
+      result |= (0x7f & b) << offset;
+      offset += 7;
+    } while (b >= 0x80);
+    return result;
+  }
+
+  public long readVslong(InputStream in) throws IOException {
+    long result = readVulong(in);
+    return (result >>> 1) ^ -(result & 1);
+  }
+
+  public float readFloat(InputStream in) throws IOException {
+    readFully(in, readBuffer, 0, 4);
+    int val = (((readBuffer[0] & 0xff) << 0)
+        + ((readBuffer[1] & 0xff) << 8)
+        + ((readBuffer[2] & 0xff) << 16)
+        + ((readBuffer[3] & 0xff) << 24));
+    return Float.intBitsToFloat(val);
+  }
+
+  public void writeFloat(OutputStream output,
+                         float value) throws IOException {
+    int ser = Float.floatToIntBits(value);
+    writeBuffer[0] = (byte) ((ser >> 0)  & 0xff);
+    writeBuffer[1] = (byte) ((ser >> 8)  & 0xff);
+    writeBuffer[2] = (byte) ((ser >> 16) & 0xff);
+    writeBuffer[3] = (byte) ((ser >> 24) & 0xff);
+    output.write(writeBuffer, 0, 4);
+  }
+
+  public double readDouble(InputStream in) throws IOException {
+    return Double.longBitsToDouble(readLongLE(in));
+  }
+
+  public long readLongLE(InputStream in) throws IOException {
+    readFully(in, readBuffer, 0, 8);
+    return (((readBuffer[0] & 0xff) << 0)
+        + ((readBuffer[1] & 0xff) << 8)
+        + ((readBuffer[2] & 0xff) << 16)
+        + ((long) (readBuffer[3] & 0xff) << 24)
+        + ((long) (readBuffer[4] & 0xff) << 32)
+        + ((long) (readBuffer[5] & 0xff) << 40)
+        + ((long) (readBuffer[6] & 0xff) << 48)
+        + ((long) (readBuffer[7] & 0xff) << 56));
+  }
+
+  private void readFully(final InputStream in, final byte[] buffer, final int off, final int len)
+      throws IOException {
+    int n = 0;
+    while (n < len) {
+      int count = in.read(buffer, off + n, len - n);
+      if (count < 0) {
+        throw new EOFException("Read past EOF for " + in);
+      }
+      n += count;
+    }
+  }
+
+  public void writeDouble(OutputStream output,
+                          double value) throws IOException {
+    writeLongLE(output, Double.doubleToLongBits(value));
+  }
+
+  private void writeLongLE(OutputStream output, long value) throws IOException {
+    writeBuffer[0] = (byte) ((value >> 0)  & 0xff);
+    writeBuffer[1] = (byte) ((value >> 8)  & 0xff);
+    writeBuffer[2] = (byte) ((value >> 16) & 0xff);
+    writeBuffer[3] = (byte) ((value >> 24) & 0xff);
+    writeBuffer[4] = (byte) ((value >> 32) & 0xff);
+    writeBuffer[5] = (byte) ((value >> 40) & 0xff);
+    writeBuffer[6] = (byte) ((value >> 48) & 0xff);
+    writeBuffer[7] = (byte) ((value >> 56) & 0xff);
+    output.write(writeBuffer, 0, 8);
+  }
+
+  /**
+   * Write the arbitrarily sized signed BigInteger in vint format.
+   *
+   * Signed integers are encoded using the low bit as the sign bit using zigzag
+   * encoding.
+   *
+   * Each byte uses the low 7 bits for data and the high bit for stop/continue.
+   *
+   * Bytes are stored LSB first.
+   * @param output the stream to write to
+   * @param value the value to output
+   * @throws IOException
+   */
+  public static void writeBigInteger(OutputStream output,
+                                     BigInteger value) throws IOException {
+    // encode the signed number as a positive integer
+    value = value.shiftLeft(1);
+    int sign = value.signum();
+    if (sign < 0) {
+      value = value.negate();
+      value = value.subtract(BigInteger.ONE);
+    }
+    int length = value.bitLength();
+    while (true) {
+      long lowBits = value.longValue() & 0x7fffffffffffffffL;
+      length -= 63;
+      // write out the next 63 bits worth of data
+      for(int i=0; i < 9; ++i) {
+        // if this is the last byte, leave the high bit off
+        if (length <= 0 && (lowBits & ~0x7f) == 0) {
+          output.write((byte) lowBits);
+          return;
+        } else {
+          output.write((byte) (0x80 | (lowBits & 0x7f)));
+          lowBits >>>= 7;
+        }
+      }
+      value = value.shiftRight(63);
+    }
+  }
+
+  /**
+   * Read the signed arbitrary sized BigInteger BigInteger in vint format
+   * @param input the stream to read from
+   * @return the read BigInteger
+   * @throws IOException
+   */
+  public static BigInteger readBigInteger(InputStream input) throws IOException {
+    BigInteger result = BigInteger.ZERO;
+    long work = 0;
+    int offset = 0;
+    long b;
+    do {
+      b = input.read();
+      if (b == -1) {
+        throw new EOFException("Reading BigInteger past EOF from " + input);
+      }
+      work |= (0x7f & b) << (offset % 63);
+      offset += 7;
+      // if we've read 63 bits, roll them into the result
+      if (offset == 63) {
+        result = BigInteger.valueOf(work);
+        work = 0;
+      } else if (offset % 63 == 0) {
+        result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
+        work = 0;
+      }
+    } while (b >= 0x80);
+    if (work != 0) {
+      result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
+    }
+    // convert back to a signed number
+    boolean isNegative = result.testBit(0);
+    if (isNegative) {
+      result = result.add(BigInteger.ONE);
+      result = result.negate();
+    }
+    result = result.shiftRight(1);
+    return result;
+  }
+
+  public enum FixedBitSizes {
+    ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
+    THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
+    TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
+    TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
+  }
+
+  /**
+   * Count the number of bits required to encode the given value
+   * @param value
+   * @return bits required to store value
+   */
+  public int findClosestNumBits(long value) {
+    int count = 0;
+    while (value != 0) {
+      count++;
+      value = value >>> 1;
+    }
+    return getClosestFixedBits(count);
+  }
+
+  /**
+   * zigzag encode the given value
+   * @param val
+   * @return zigzag encoded value
+   */
+  public long zigzagEncode(long val) {
+    return (val << 1) ^ (val >> 63);
+  }
+
+  /**
+   * zigzag decode the given value
+   * @param val
+   * @return zizag decoded value
+   */
+  public long zigzagDecode(long val) {
+    return (val >>> 1) ^ -(val & 1);
+  }
+
+  /**
+   * Compute the bits required to represent pth percentile value
+   * @param data - array
+   * @param p - percentile value (>=0.0 to <=1.0)
+   * @return pth percentile bits
+   */
+  public int percentileBits(long[] data, int offset, int length,
+                            double p) {
+    if ((p > 1.0) || (p <= 0.0)) {
+      return -1;
+    }
+
+    // histogram that store the encoded bit requirement for each values.
+    // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
+    int[] hist = new int[32];
+
+    // compute the histogram
+    for(int i = offset; i < (offset + length); i++) {
+      int idx = encodeBitWidth(findClosestNumBits(data[i]));
+      hist[idx] += 1;
+    }
+
+    int perLen = (int) (length * (1.0 - p));
+
+    // return the bits required by pth percentile length
+    for(int i = hist.length - 1; i >= 0; i--) {
+      perLen -= hist[i];
+      if (perLen < 0) {
+        return decodeBitWidth(i);
+      }
+    }
+
+    return 0;
+  }
+
+  /**
+   * Read n bytes in big endian order and convert to long
+   * @return long value
+   */
+  public long bytesToLongBE(InStream input, int n) throws IOException {
+    long out = 0;
+    long val = 0;
+    while (n > 0) {
+      n--;
+      // store it in a long and then shift else integer overflow will occur
+      val = input.read();
+      out |= (val << (n * 8));
+    }
+    return out;
+  }
+
+  /**
+   * Calculate the number of bytes required
+   * @param n - number of values
+   * @param numBits - bit width
+   * @return number of bytes required
+   */
+  int getTotalBytesRequired(int n, int numBits) {
+    return (n * numBits + 7) / 8;
+  }
+
+  /**
+   * For a given fixed bit this function will return the closest available fixed
+   * bit
+   * @param n
+   * @return closest valid fixed bit
+   */
+  public int getClosestFixedBits(int n) {
+    if (n == 0) {
+      return 1;
+    }
+
+    if (n >= 1 && n <= 24) {
+      return n;
+    } else if (n > 24 && n <= 26) {
+      return 26;
+    } else if (n > 26 && n <= 28) {
+      return 28;
+    } else if (n > 28 && n <= 30) {
+      return 30;
+    } else if (n > 30 && n <= 32) {
+      return 32;
+    } else if (n > 32 && n <= 40) {
+      return 40;
+    } else if (n > 40 && n <= 48) {
+      return 48;
+    } else if (n > 48 && n <= 56) {
+      return 56;
+    } else {
+      return 64;
+    }
+  }
+
+  public int getClosestAlignedFixedBits(int n) {
+    if (n == 0 ||  n == 1) {
+      return 1;
+    } else if (n > 1 && n <= 2) {
+      return 2;
+    } else if (n > 2 && n <= 4) {
+      return 4;
+    } else if (n > 4 && n <= 8) {
+      return 8;
+    } else if (n > 8 && n <= 16) {
+      return 16;
+    } else if (n > 16 && n <= 24) {
+      return 24;
+    } else if (n > 24 && n <= 32) {
+      return 32;
+    } else if (n > 32 && n <= 40) {
+      return 40;
+    } else if (n > 40 && n <= 48) {
+      return 48;
+    } else if (n > 48 && n <= 56) {
+      return 56;
+    } else {
+      return 64;
+    }
+  }
+
+  /**
+   * Finds the closest available fixed bit width match and returns its encoded
+   * value (ordinal)
+   * @param n - fixed bit width to encode
+   * @return encoded fixed bit width
+   */
+  public int encodeBitWidth(int n) {
+    n = getClosestFixedBits(n);
+
+    if (n >= 1 && n <= 24) {
+      return n - 1;
+    } else if (n > 24 && n <= 26) {
+      return FixedBitSizes.TWENTYSIX.ordinal();
+    } else if (n > 26 && n <= 28) {
+      return FixedBitSizes.TWENTYEIGHT.ordinal();
+    } else if (n > 28 && n <= 30) {
+      return FixedBitSizes.THIRTY.ordinal();
+    } else if (n > 30 && n <= 32) {
+      return FixedBitSizes.THIRTYTWO.ordinal();
+    } else if (n > 32 && n <= 40) {
+      return FixedBitSizes.FORTY.ordinal();
+    } else if (n > 40 && n <= 48) {
+      return FixedBitSizes.FORTYEIGHT.ordinal();
+    } else if (n > 48 && n <= 56) {
+      return FixedBitSizes.FIFTYSIX.ordinal();
+    } else {
+      return FixedBitSizes.SIXTYFOUR.ordinal();
+    }
+  }
+
+  /**
+   * Decodes the ordinal fixed bit value to actual fixed bit width value
+   * @param n - encoded fixed bit width
+   * @return decoded fixed bit width
+   */
+  public int decodeBitWidth(int n) {
+    if (n >= FixedBitSizes.ONE.ordinal()
+        && n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
+      return n + 1;
+    } else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
+      return 26;
+    } else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
+      return 28;
+    } else if (n == FixedBitSizes.THIRTY.ordinal()) {
+      return 30;
+    } else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
+      return 32;
+    } else if (n == FixedBitSizes.FORTY.ordinal()) {
+      return 40;
+    } else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
+      return 48;
+    } else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
+      return 56;
+    } else {
+      return 64;
+    }
+  }
+
+  /**
+   * Bitpack and write the input values to underlying output stream
+   * @param input - values to write
+   * @param offset - offset
+   * @param len - length
+   * @param bitSize - bit width
+   * @param output - output stream
+   * @throws IOException
+   */
+  public void writeInts(long[] input, int offset, int len, int bitSize,
+                        OutputStream output) throws IOException {
+    if (input == null || input.length < 1 || offset < 0 || len < 1
+        || bitSize < 1) {
+      return;
+    }
+
+    switch (bitSize) {
+    case 1:
+      unrolledBitPack1(input, offset, len, output);
+      return;
+    case 2:
+      unrolledBitPack2(input, offset, len, output);
+      return;
+    case 4:
+      unrolledBitPack4(input, offset, len, output);
+      return;
+    case 8:
+      unrolledBitPack8(input, offset, len, output);
+      return;
+    case 16:
+      unrolledBitPack16(input, offset, len, output);
+      return;
+    case 24:
+      unrolledBitPack24(input, offset, len, output);
+      return;
+    case 32:
+      unrolledBitPack32(input, offset, len, output);
+      return;
+    case 40:
+      unrolledBitPack40(input, offset, len, output);
+      return;
+    case 48:
+      unrolledBitPack48(input, offset, len, output);
+      return;
+    case 56:
+      unrolledBitPack56(input, offset, len, output);
+      return;
+    case 64:
+      unrolledBitPack64(input, offset, len, output);
+      return;
+    default:
+      break;
+    }
+
+    int bitsLeft = 8;
+    byte current = 0;
+    for(int i = offset; i < (offset + len); i++) {
+      long value = input[i];
+      int bitsToWrite = bitSize;
+      while (bitsToWrite > bitsLeft) {
+        // add the bits to the bottom of the current word
+        current |= value >>> (bitsToWrite - bitsLeft);
+        // subtract out the bits we just added
+        bitsToWrite -= bitsLeft;
+        // zero out the bits above bitsToWrite
+        value &= (1L << bitsToWrite) - 1;
+        output.write(current);
+        current = 0;
+        bitsLeft = 8;
+      }
+      bitsLeft -= bitsToWrite;
+      current |= value << bitsLeft;
+      if (bitsLeft == 0) {
+        output.write(current);
+        current = 0;
+        bitsLeft = 8;
+      }
+    }
+
+    // flush
+    if (bitsLeft != 8) {
+      output.write(current);
+      current = 0;
+      bitsLeft = 8;
+    }
+  }
+
+  private void unrolledBitPack1(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    final int numHops = 8;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int val = 0;
+    for (int i = offset; i < endUnroll; i = i + numHops) {
+      val = (int) (val | ((input[i] & 1) << 7)
+          | ((input[i + 1] & 1) << 6)
+          | ((input[i + 2] & 1) << 5)
+          | ((input[i + 3] & 1) << 4)
+          | ((input[i + 4] & 1) << 3)
+          | ((input[i + 5] & 1) << 2)
+          | ((input[i + 6] & 1) << 1)
+          | (input[i + 7]) & 1);
+      output.write(val);
+      val = 0;
+    }
+
+    if (remainder > 0) {
+      int startShift = 7;
+      for (int i = endUnroll; i < endOffset; i++) {
+        val = (int) (val | (input[i] & 1) << startShift);
+        startShift -= 1;
+      }
+      output.write(val);
+    }
+  }
+
+  private void unrolledBitPack2(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    final int numHops = 4;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int val = 0;
+    for (int i = offset; i < endUnroll; i = i + numHops) {
+      val = (int) (val | ((input[i] & 3) << 6)
+          | ((input[i + 1] & 3) << 4)
+          | ((input[i + 2] & 3) << 2)
+          | (input[i + 3]) & 3);
+      output.write(val);
+      val = 0;
+    }
+
+    if (remainder > 0) {
+      int startShift = 6;
+      for (int i = endUnroll; i < endOffset; i++) {
+        val = (int) (val | (input[i] & 3) << startShift);
+        startShift -= 2;
+      }
+      output.write(val);
+    }
+  }
+
+  private void unrolledBitPack4(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    final int numHops = 2;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int val = 0;
+    for (int i = offset; i < endUnroll; i = i + numHops) {
+      val = (int) (val | ((input[i] & 15) << 4) | (input[i + 1]) & 15);
+      output.write(val);
+      val = 0;
+    }
+
+    if (remainder > 0) {
+      int startShift = 4;
+      for (int i = endUnroll; i < endOffset; i++) {
+        val = (int) (val | (input[i] & 15) << startShift);
+        startShift -= 4;
+      }
+      output.write(val);
+    }
+  }
+
+  private void unrolledBitPack8(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 1);
+  }
+
+  private void unrolledBitPack16(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 2);
+  }
+
+  private void unrolledBitPack24(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 3);
+  }
+
+  private void unrolledBitPack32(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 4);
+  }
+
+  private void unrolledBitPack40(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 5);
+  }
+
+  private void unrolledBitPack48(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 6);
+  }
+
+  private void unrolledBitPack56(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 7);
+  }
+
+  private void unrolledBitPack64(long[] input, int offset, int len,
+      OutputStream output) throws IOException {
+    unrolledBitPackBytes(input, offset, len, output, 8);
+  }
+
+  private void unrolledBitPackBytes(long[] input, int offset, int len, OutputStream output, int numBytes) throws IOException {
+    final int numHops = 8;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int i = offset;
+    for (; i < endUnroll; i = i + numHops) {
+      writeLongBE(output, input, i, numHops, numBytes);
+    }
+
+    if (remainder > 0) {
+      writeRemainingLongs(output, i, input, remainder, numBytes);
+    }
+  }
+
+  private void writeRemainingLongs(OutputStream output, int offset, long[] input, int remainder,
+      int numBytes) throws IOException {
+    final int numHops = remainder;
+
+    int idx = 0;
+    switch (numBytes) {
+    case 1:
+      while (remainder > 0) {
+        writeBuffer[idx] = (byte) (input[offset + idx] & 255);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 2:
+      while (remainder > 0) {
+        writeLongBE2(output, input[offset + idx], idx * 2);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 3:
+      while (remainder > 0) {
+        writeLongBE3(output, input[offset + idx], idx * 3);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 4:
+      while (remainder > 0) {
+        writeLongBE4(output, input[offset + idx], idx * 4);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 5:
+      while (remainder > 0) {
+        writeLongBE5(output, input[offset + idx], idx * 5);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 6:
+      while (remainder > 0) {
+        writeLongBE6(output, input[offset + idx], idx * 6);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 7:
+      while (remainder > 0) {
+        writeLongBE7(output, input[offset + idx], idx * 7);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 8:
+      while (remainder > 0) {
+        writeLongBE8(output, input[offset + idx], idx * 8);
+        remainder--;
+        idx++;
+      }
+      break;
+    default:
+      break;
+    }
+
+    final int toWrite = numHops * numBytes;
+    output.write(writeBuffer, 0, toWrite);
+  }
+
+  private void writeLongBE(OutputStream output, long[] input, int offset, int numHops, int numBytes) throws IOException {
+
+    switch (numBytes) {
+    case 1:
+      writeBuffer[0] = (byte) (input[offset + 0] & 255);
+      writeBuffer[1] = (byte) (input[offset + 1] & 255);
+      writeBuffer[2] = (byte) (input[offset + 2] & 255);
+      writeBuffer[3] = (byte) (input[offset + 3] & 255);
+      writeBuffer[4] = (byte) (input[offset + 4] & 255);
+      writeBuffer[5] = (byte) (input[offset + 5] & 255);
+      writeBuffer[6] = (byte) (input[offset + 6] & 255);
+      writeBuffer[7] = (byte) (input[offset + 7] & 255);
+      break;
+    case 2:
+      writeLongBE2(output, input[offset + 0], 0);
+      writeLongBE2(output, input[offset + 1], 2);
+      writeLongBE2(output, input[offset + 2], 4);
+      writeLongBE2(output, input[offset + 3], 6);
+      writeLongBE2(output, input[offset + 4], 8);
+      writeLongBE2(output, input[offset + 5], 10);
+      writeLongBE2(output, input[offset + 6], 12);
+      writeLongBE2(output, input[offset + 7], 14);
+      break;
+    case 3:
+      writeLongBE3(output, input[offset + 0], 0);
+      writeLongBE3(output, input[offset + 1], 3);
+      writeLongBE3(output, input[offset + 2], 6);
+      writeLongBE3(output, input[offset + 3], 9);
+      writeLongBE3(output, input[offset + 4], 12);
+      writeLongBE3(output, input[offset + 5], 15);
+      writeLongBE3(output, input[offset + 6], 18);
+      writeLongBE3(output, input[offset + 7], 21);
+      break;
+    case 4:
+      writeLongBE4(output, input[offset + 0], 0);
+      writeLongBE4(output, input[offset + 1], 4);
+      writeLongBE4(output, input[offset + 2], 8);
+      writeLongBE4(output, input[offset + 3], 12);
+      writeLongBE4(output, input[offset + 4], 16);
+      writeLongBE4(output, input[offset + 5], 20);
+      writeLongBE4(output, input[offset + 6], 24);
+      writeLongBE4(output, input[offset + 7], 28);
+      break;
+    case 5:
+      writeLongBE5(output, input[offset + 0], 0);
+      writeLongBE5(output, input[offset + 1], 5);
+      writeLongBE5(output, input[offset + 2], 10);
+      writeLongBE5(output, input[offset + 3], 15);
+      writeLongBE5(output, input[offset + 4], 20);
+      writeLongBE5(output, input[offset + 5], 25);
+      writeLongBE5(output, input[offset + 6], 30);
+      writeLongBE5(output, input[offset + 7], 35);
+      break;
+    case 6:
+      writeLongBE6(output, input[offset + 0], 0);
+      writeLongBE6(output, input[offset + 1], 6);
+      writeLongBE6(output, input[offset + 2], 12);
+      writeLongBE6(output, input[offset + 3], 18);
+      writeLongBE6(output, input[offset + 4], 24);
+      writeLongBE6(output, input[offset + 5], 30);
+      writeLongBE6(output, input[offset + 6], 36);
+      writeLongBE6(output, input[offset + 7], 42);
+      break;
+    case 7:
+      writeLongBE7(output, input[offset + 0], 0);
+      writeLongBE7(output, input[offset + 1], 7);
+      writeLongBE7(output, input[offset + 2], 14);
+      writeLongBE7(output, input[offset + 3], 21);
+      writeLongBE7(output, input[offset + 4], 28);
+      writeLongBE7(output, input[offset + 5], 35);
+      writeLongBE7(output, input[offset + 6], 42);
+      writeLongBE7(output, input[offset + 7], 49);
+      break;
+    case 8:
+      writeLongBE8(output, input[offset + 0], 0);
+      writeLongBE8(output, input[offset + 1], 8);
+      writeLongBE8(output, input[offset + 2], 16);
+      writeLongBE8(output, input[offset + 3], 24);
+      writeLongBE8(output, input[offset + 4], 32);
+      writeLongBE8(output, input[offset + 5], 40);
+      writeLongBE8(output, input[offset + 6], 48);
+      writeLongBE8(output, input[offset + 7], 56);
+      break;
+      default:
+        break;
+    }
+
+    final int toWrite = numHops * numBytes;
+    output.write(writeBuffer, 0, toWrite);
+  }
+
+  private void writeLongBE2(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 0);
+  }
+
+  private void writeLongBE3(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 16);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 2] =  (byte) (val >>> 0);
+  }
+
+  private void writeLongBE4(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 24);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 16);
+    writeBuffer[wbOffset + 2] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 3] =  (byte) (val >>> 0);
+  }
+
+  private void writeLongBE5(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 32);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 24);
+    writeBuffer[wbOffset + 2] =  (byte) (val >>> 16);
+    writeBuffer[wbOffset + 3] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 4] =  (byte) (val >>> 0);
+  }
+
+  private void writeLongBE6(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 40);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 32);
+    writeBuffer[wbOffset + 2] =  (byte) (val >>> 24);
+    writeBuffer[wbOffset + 3] =  (byte) (val >>> 16);
+    writeBuffer[wbOffset + 4] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 5] =  (byte) (val >>> 0);
+  }
+
+  private void writeLongBE7(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 48);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 40);
+    writeBuffer[wbOffset + 2] =  (byte) (val >>> 32);
+    writeBuffer[wbOffset + 3] =  (byte) (val >>> 24);
+    writeBuffer[wbOffset + 4] =  (byte) (val >>> 16);
+    writeBuffer[wbOffset + 5] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 6] =  (byte) (val >>> 0);
+  }
+
+  private void writeLongBE8(OutputStream output, long val, int wbOffset) {
+    writeBuffer[wbOffset + 0] =  (byte) (val >>> 56);
+    writeBuffer[wbOffset + 1] =  (byte) (val >>> 48);
+    writeBuffer[wbOffset + 2] =  (byte) (val >>> 40);
+    writeBuffer[wbOffset + 3] =  (byte) (val >>> 32);
+    writeBuffer[wbOffset + 4] =  (byte) (val >>> 24);
+    writeBuffer[wbOffset + 5] =  (byte) (val >>> 16);
+    writeBuffer[wbOffset + 6] =  (byte) (val >>> 8);
+    writeBuffer[wbOffset + 7] =  (byte) (val >>> 0);
+  }
+
+  /**
+   * Read bitpacked integers from input stream
+   * @param buffer - input buffer
+   * @param offset - offset
+   * @param len - length
+   * @param bitSize - bit width
+   * @param input - input stream
+   * @throws IOException
+   */
+  public void readInts(long[] buffer, int offset, int len, int bitSize,
+                       InStream input) throws IOException {
+    int bitsLeft = 0;
+    int current = 0;
+
+    switch (bitSize) {
+    case 1:
+      unrolledUnPack1(buffer, offset, len, input);
+      return;
+    case 2:
+      unrolledUnPack2(buffer, offset, len, input);
+      return;
+    case 4:
+      unrolledUnPack4(buffer, offset, len, input);
+      return;
+    case 8:
+      unrolledUnPack8(buffer, offset, len, input);
+      return;
+    case 16:
+      unrolledUnPack16(buffer, offset, len, input);
+      return;
+    case 24:
+      unrolledUnPack24(buffer, offset, len, input);
+      return;
+    case 32:
+      unrolledUnPack32(buffer, offset, len, input);
+      return;
+    case 40:
+      unrolledUnPack40(buffer, offset, len, input);
+      return;
+    case 48:
+      unrolledUnPack48(buffer, offset, len, input);
+      return;
+    case 56:
+      unrolledUnPack56(buffer, offset, len, input);
+      return;
+    case 64:
+      unrolledUnPack64(buffer, offset, len, input);
+      return;
+    default:
+      break;
+    }
+
+    for(int i = offset; i < (offset + len); i++) {
+      long result = 0;
+      int bitsLeftToRead = bitSize;
+      while (bitsLeftToRead > bitsLeft) {
+        result <<= bitsLeft;
+        result |= current & ((1 << bitsLeft) - 1);
+        bitsLeftToRead -= bitsLeft;
+        current = input.read();
+        bitsLeft = 8;
+      }
+
+      // handle the left over bits
+      if (bitsLeftToRead > 0) {
+        result <<= bitsLeftToRead;
+        bitsLeft -= bitsLeftToRead;
+        result |= (current >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+      }
+      buffer[i] = result;
+    }
+  }
+
+
+  private void unrolledUnPack1(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    final int numHops = 8;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int val = 0;
+    for (int i = offset; i < endUnroll; i = i + numHops) {
+      val = input.read();
+      buffer[i] = (val >>> 7) & 1;
+      buffer[i + 1] = (val >>> 6) & 1;
+      buffer[i + 2] = (val >>> 5) & 1;
+      buffer[i + 3] = (val >>> 4) & 1;
+      buffer[i + 4] = (val >>> 3) & 1;
+      buffer[i + 5] = (val >>> 2) & 1;
+      buffer[i + 6] = (val >>> 1) & 1;
+      buffer[i + 7] = val & 1;
+    }
+
+    if (remainder > 0) {
+      int startShift = 7;
+      val = input.read();
+      for (int i = endUnroll; i < endOffset; i++) {
+        buffer[i] = (val >>> startShift) & 1;
+        startShift -= 1;
+      }
+    }
+  }
+
+  private void unrolledUnPack2(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    final int numHops = 4;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int val = 0;
+    for (int i = offset; i < endUnroll; i = i + numHops) {
+      val = input.read();
+      buffer[i] = (val >>> 6) & 3;
+      buffer[i + 1] = (val >>> 4) & 3;
+      buffer[i + 2] = (val >>> 2) & 3;
+      buffer[i + 3] = val & 3;
+    }
+
+    if (remainder > 0) {
+      int startShift = 6;
+      val = input.read();
+      for (int i = endUnroll; i < endOffset; i++) {
+        buffer[i] = (val >>> startShift) & 3;
+        startShift -= 2;
+      }
+    }
+  }
+
+  private void unrolledUnPack4(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    final int numHops = 2;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int val = 0;
+    for (int i = offset; i < endUnroll; i = i + numHops) {
+      val = input.read();
+      buffer[i] = (val >>> 4) & 15;
+      buffer[i + 1] = val & 15;
+    }
+
+    if (remainder > 0) {
+      int startShift = 4;
+      val = input.read();
+      for (int i = endUnroll; i < endOffset; i++) {
+        buffer[i] = (val >>> startShift) & 15;
+        startShift -= 4;
+      }
+    }
+  }
+
+  private void unrolledUnPack8(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 1);
+  }
+
+  private void unrolledUnPack16(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 2);
+  }
+
+  private void unrolledUnPack24(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 3);
+  }
+
+  private void unrolledUnPack32(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 4);
+  }
+
+  private void unrolledUnPack40(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 5);
+  }
+
+  private void unrolledUnPack48(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 6);
+  }
+
+  private void unrolledUnPack56(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 7);
+  }
+
+  private void unrolledUnPack64(long[] buffer, int offset, int len,
+      InStream input) throws IOException {
+    unrolledUnPackBytes(buffer, offset, len, input, 8);
+  }
+
+  private void unrolledUnPackBytes(long[] buffer, int offset, int len, InStream input, int numBytes)
+      throws IOException {
+    final int numHops = 8;
+    final int remainder = len % numHops;
+    final int endOffset = offset + len;
+    final int endUnroll = endOffset - remainder;
+    int i = offset;
+    for (; i < endUnroll; i = i + numHops) {
+      readLongBE(input, buffer, i, numHops, numBytes);
+    }
+
+    if (remainder > 0) {
+      readRemainingLongs(buffer, i, input, remainder, numBytes);
+    }
+  }
+
+  private void readRemainingLongs(long[] buffer, int offset, InStream input, int remainder,
+      int numBytes) throws IOException {
+    final int toRead = remainder * numBytes;
+    // bulk read to buffer
+    int bytesRead = input.read(readBuffer, 0, toRead);
+    while (bytesRead != toRead) {
+      bytesRead += input.read(readBuffer, bytesRead, toRead - bytesRead);
+    }
+
+    int idx = 0;
+    switch (numBytes) {
+    case 1:
+      while (remainder > 0) {
+        buffer[offset++] = readBuffer[idx] & 255;
+        remainder--;
+        idx++;
+      }
+      break;
+    case 2:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE2(input, idx * 2);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 3:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE3(input, idx * 3);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 4:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE4(input, idx * 4);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 5:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE5(input, idx * 5);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 6:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE6(input, idx * 6);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 7:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE7(input, idx * 7);
+        remainder--;
+        idx++;
+      }
+      break;
+    case 8:
+      while (remainder > 0) {
+        buffer[offset++] = readLongBE8(input, idx * 8);
+        remainder--;
+        idx++;
+      }
+      break;
+    default:
+      break;
+    }
+  }
+
+  private void readLongBE(InStream in, long[] buffer, int start, int numHops, int numBytes)
+      throws IOException {
+    final int toRead = numHops * numBytes;
+    // bulk read to buffer
+    int bytesRead = in.read(readBuffer, 0, toRead);
+    while (bytesRead != toRead) {
+      bytesRead += in.read(readBuffer, bytesRead, toRead - bytesRead);
+    }
+
+    switch (numBytes) {
+    case 1:
+      buffer[start + 0] = readBuffer[0] & 255;
+      buffer[start + 1] = readBuffer[1] & 255;
+      buffer[start + 2] = readBuffer[2] & 255;
+      buffer[start + 3] = readBuffer[3] & 255;
+      buffer[start + 4] = readBuffer[4] & 255;
+      buffer[start + 5] = readBuffer[5] & 255;
+      buffer[start + 6] = readBuffer[6] & 255;
+      buffer[start + 7] = readBuffer[7] & 255;
+      break;
+    case 2:
+      buffer[start + 0] = readLongBE2(in, 0);
+      buffer[start + 1] = readLongBE2(in, 2);
+      buffer[start + 2] = readLongBE2(in, 4);
+      buffer[start + 3] = readLongBE2(in, 6);
+      buffer[start + 4] = readLongBE2(in, 8);
+      buffer[start + 5] = readLongBE2(in, 10);
+      buffer[start + 6] = readLongBE2(in, 12);
+      buffer[start + 7] = readLongBE2(in, 14);
+      break;
+    case 3:
+      buffer[start + 0] = readLongBE3(in, 0);
+      buffer[start + 1] = readLongBE3(in, 3);
+      buffer[start + 2] = readLongBE3(in, 6);
+      buffer[start + 3] = readLongBE3(in, 9);
+      buffer[start + 4] = readLongBE3(in, 12);
+      buffer[start + 5] = readLongBE3(in, 15);
+      buffer[start + 6] = readLongBE3(in, 18);
+      buffer[start + 7] = readLongBE3(in, 21);
+      break;
+    case 4:
+      buffer[start + 0] = readLongBE4(in, 0);
+      buffer[start + 1] = readLongBE4(in, 4);
+      buffer[start + 2] = readLongBE4(in, 8);
+      buffer[start + 3] = readLongBE4(in, 12);
+      buffer[start + 4] = readLongBE4(in, 16);
+      buffer[start + 5] = readLongBE4(in, 20);
+      buffer[start + 6] = readLongBE4(in, 24);
+      buffer[start + 7] = readLongBE4(in, 28);
+      break;
+    case 5:
+      buffer[start + 0] = readLongBE5(in, 0);
+      buffer[start + 1] = readLongBE5(in, 5);
+      buffer[start + 2] = readLongBE5(in, 10);
+      buffer[start + 3] = readLongBE5(in, 15);
+      buffer[start + 4] = readLongBE5(in, 20);
+      buffer[start + 5] = readLongBE5(in, 25);
+      buffer[start + 6] = readLongBE5(in, 30);
+      buffer[start + 7] = readLongBE5(in, 35);
+      break;
+    case 6:
+      buffer[start + 0] = readLongBE6(in, 0);
+      buffer[start + 1] = readLongBE6(in, 6);
+      buffer[start + 2] = readLongBE6(in, 12);
+      buffer[start + 3] = readLongBE6(in, 18);
+      buffer[start + 4] = readLongBE6(in, 24);
+      buffer[start + 5] = readLongBE6(in, 30);
+      buffer[start + 6] = readLongBE6(in, 36);
+      buffer[start + 7] = readLongBE6(in, 42);
+      break;
+    case 7:
+      buffer[start + 0] = readLongBE7(in, 0);
+      buffer[start + 1] = readLongBE7(in, 7);
+      buffer[start + 2] = readLongBE7(in, 14);
+      buffer[start + 3] = readLongBE7(in, 21);
+      buffer[start + 4] = readLongBE7(in, 28);
+      buffer[start + 5] = readLongBE7(in, 35);
+      buffer[start + 6] = readLongBE7(in, 42);
+      buffer[start + 7] = readLongBE7(in, 49);
+      break;
+    case 8:
+      buffer[start + 0] = readLongBE8(in, 0);
+      buffer[start + 1] = readLongBE8(in, 8);
+      buffer[start + 2] = readLongBE8(in, 16);
+      buffer[start + 3] = readLongBE8(in, 24);
+      buffer[start + 4] = readLongBE8(in, 32);
+      buffer[start + 5] = readLongBE8(in, 40);
+      buffer[start + 6] = readLongBE8(in, 48);
+      buffer[start + 7] = readLongBE8(in, 56);
+      break;
+    default:
+      break;
+    }
+  }
+
+  private long readLongBE2(InStream in, int rbOffset) {
+    return (((readBuffer[rbOffset] & 255) << 8)
+        + ((readBuffer[rbOffset + 1] & 255) << 0));
+  }
+
+  private long readLongBE3(InStream in, int rbOffset) {
+    return (((readBuffer[rbOffset] & 255) << 16)
+        + ((readBuffer[rbOffset + 1] & 255) << 8)
+        + ((readBuffer[rbOffset + 2] & 255) << 0));
+  }
+
+  private long readLongBE4(InStream in, int rbOffset) {
+    return (((long) (readBuffer[rbOffset] & 255) << 24)
+        + ((readBuffer[rbOffset + 1] & 255) << 16)
+        + ((readBuffer[rbOffset + 2] & 255) << 8)
+        + ((readBuffer[rbOffset + 3] & 255) << 0));
+  }
+
+  private long readLongBE5(InStream in, int rbOffset) {
+    return (((long) (readBuffer[rbOffset] & 255) << 32)
+        + ((long) (readBuffer[rbOffset + 1] & 255) << 24)
+        + ((readBuffer[rbOffset + 2] & 255) << 16)
+        + ((readBuffer[rbOffset + 3] & 255) << 8)
+        + ((readBuffer[rbOffset + 4] & 255) << 0));
+  }
+
+  private long readLongBE6(InStream in, int rbOffset) {
+    return (((long) (readBuffer[rbOffset] & 255) << 40)
+        + ((long) (readBuffer[rbOffset + 1] & 255) << 32)
+        + ((long) (readBuffer[rbOffset + 2] & 255) << 24)
+        + ((readBuffer[rbOffset + 3] & 255) << 16)
+        + ((readBuffer[rbOffset + 4] & 255) << 8)
+        + ((readBuffer[rbOffset + 5] & 255) << 0));
+  }
+
+  private long readLongBE7(InStream in, int rbOffset) {
+    return (((long) (readBuffer[rbOffset] & 255) << 48)
+        + ((long) (readBuffer[rbOffset + 1] & 255) << 40)
+        + ((long) (readBuffer[rbOffset + 2] & 255) << 32)
+        + ((long) (readBuffer[rbOffset + 3] & 255) << 24)
+        + ((readBuffer[rbOffset + 4] & 255) << 16)
+        + ((readBuffer[rbOffset + 5] & 255) << 8)
+        + ((readBuffer[rbOffset + 6] & 255) << 0));
+  }
+
+  private long readLongBE8(InStream in, int rbOffset) {
+    return (((long) (readBuffer[rbOffset] & 255) << 56)
+        + ((long) (readBuffer[rbOffset + 1] & 255) << 48)
+        + ((long) (readBuffer[rbOffset + 2] & 255) << 40)
+        + ((long) (readBuffer[rbOffset + 3] & 255) << 32)
+        + ((long) (readBuffer[rbOffset + 4] & 255) << 24)
+        + ((readBuffer[rbOffset + 5] & 255) << 16)
+        + ((readBuffer[rbOffset + 6] & 255) << 8)
+        + ((readBuffer[rbOffset + 7] & 255) << 0));
+  }
+
+  // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
+  // ArithmeticException in case of overflow
+  public boolean isSafeSubtract(long left, long right) {
+    return (left ^ right) >= 0 | (left ^ (left - right)) >= 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/SettableUncompressedStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/SettableUncompressedStream.java b/orc/src/java/org/apache/hive/orc/impl/SettableUncompressedStream.java
new file mode 100644
index 0000000..5ce0b24
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/SettableUncompressedStream.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.common.DiskRangeInfo;
+import org.apache.hadoop.hive.common.io.DiskRange;
+
+/**
+ * An uncompressed stream whose underlying byte buffer can be set.
+ */
+public class SettableUncompressedStream extends InStream.UncompressedStream {
+
+  public SettableUncompressedStream(String name, List<DiskRange> input, long length) {
+    super(name, input, length);
+    setOffset(input);
+  }
+
+  public void setBuffers(DiskRangeInfo diskRangeInfo) {
+    reset(diskRangeInfo.getDiskRanges(), diskRangeInfo.getTotalLength());
+    setOffset(diskRangeInfo.getDiskRanges());
+  }
+
+  private void setOffset(List<DiskRange> list) {
+    currentOffset = list.isEmpty() ? 0 : list.get(0).getOffset();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/SnappyCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/SnappyCodec.java b/orc/src/java/org/apache/hive/orc/impl/SnappyCodec.java
new file mode 100644
index 0000000..c6c0358
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/SnappyCodec.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.iq80.snappy.Snappy;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+public class SnappyCodec implements CompressionCodec, DirectDecompressionCodec {
+  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
+
+  Boolean direct = null;
+
+  @Override
+  public boolean compress(ByteBuffer in, ByteBuffer out,
+                          ByteBuffer overflow) throws IOException {
+    int inBytes = in.remaining();
+    // I should work on a patch for Snappy to support an overflow buffer
+    // to prevent the extra buffer copy.
+    byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
+    int outBytes =
+        Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
+            compressed, 0);
+    if (outBytes < inBytes) {
+      int remaining = out.remaining();
+      if (remaining >= outBytes) {
+        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
+            out.position(), outBytes);
+        out.position(out.position() + outBytes);
+      } else {
+        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
+            out.position(), remaining);
+        out.position(out.limit());
+        System.arraycopy(compressed, remaining, overflow.array(),
+            overflow.arrayOffset(), outBytes - remaining);
+        overflow.position(outBytes - remaining);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
+    if(in.isDirect() && out.isDirect()) {
+      directDecompress(in, out);
+      return;
+    }
+    int inOffset = in.position();
+    int uncompressLen =
+        Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
+        in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
+    out.position(uncompressLen + out.position());
+    out.flip();
+  }
+
+  @Override
+  public boolean isAvailable() {
+    if (direct == null) {
+      try {
+        if (SHIMS.getDirectDecompressor(
+            HadoopShims.DirectCompressionType.SNAPPY) != null) {
+          direct = Boolean.valueOf(true);
+        } else {
+          direct = Boolean.valueOf(false);
+        }
+      } catch (UnsatisfiedLinkError ule) {
+        direct = Boolean.valueOf(false);
+      }
+    }
+    return direct.booleanValue();
+  }
+
+  @Override
+  public void directDecompress(ByteBuffer in, ByteBuffer out)
+      throws IOException {
+    HadoopShims.DirectDecompressor decompressShim =
+        SHIMS.getDirectDecompressor(HadoopShims.DirectCompressionType.SNAPPY);
+    decompressShim.decompress(in, out);
+    out.flip(); // flip for read
+  }
+
+  @Override
+  public CompressionCodec modify(EnumSet<Modifier> modifiers) {
+    // snappy allows no modifications
+    return this;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/StreamName.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/StreamName.java b/orc/src/java/org/apache/hive/orc/impl/StreamName.java
new file mode 100644
index 0000000..dd99665
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/StreamName.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.OrcProto;
+
+/**
+ * The name of a stream within a stripe.
+ */
+public class StreamName implements Comparable<StreamName> {
+  private final int column;
+  private final OrcProto.Stream.Kind kind;
+
+  public static enum Area {
+    DATA, INDEX
+  }
+
+  public StreamName(int column, OrcProto.Stream.Kind kind) {
+    this.column = column;
+    this.kind = kind;
+  }
+
+  public boolean equals(Object obj) {
+    if (obj != null && obj instanceof  StreamName) {
+      StreamName other = (StreamName) obj;
+      return other.column == column && other.kind == kind;
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int compareTo(StreamName streamName) {
+    if (streamName == null) {
+      return -1;
+    }
+    Area area = getArea(kind);
+    Area otherArea = streamName.getArea(streamName.kind);
+    if (area != otherArea) {
+      return -area.compareTo(otherArea);
+    }
+    if (column != streamName.column) {
+      return column < streamName.column ? -1 : 1;
+    }
+    return kind.compareTo(streamName.kind);
+  }
+
+  public int getColumn() {
+    return column;
+  }
+
+  public OrcProto.Stream.Kind getKind() {
+    return kind;
+  }
+
+  public Area getArea() {
+    return getArea(kind);
+  }
+
+  public static Area getArea(OrcProto.Stream.Kind kind) {
+    switch (kind) {
+      case ROW_INDEX:
+      case DICTIONARY_COUNT:
+      case BLOOM_FILTER:
+        return Area.INDEX;
+      default:
+        return Area.DATA;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "Stream for column " + column + " kind " + kind;
+  }
+
+  @Override
+  public int hashCode() {
+    return column * 101 + kind.getNumber();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/StringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/StringRedBlackTree.java b/orc/src/java/org/apache/hive/orc/impl/StringRedBlackTree.java
new file mode 100644
index 0000000..f9113d0
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/StringRedBlackTree.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
+ * and an offset for each entry.
+ */
+public class StringRedBlackTree extends RedBlackTree {
+  private final DynamicByteArray byteArray = new DynamicByteArray();
+  private final DynamicIntArray keyOffsets;
+  private final Text newKey = new Text();
+
+  public StringRedBlackTree(int initialCapacity) {
+    super(initialCapacity);
+    keyOffsets = new DynamicIntArray(initialCapacity);
+  }
+
+  public int add(String value) {
+    newKey.set(value);
+    return addNewKey();
+  }
+
+  private int addNewKey() {
+    // if the newKey is actually new, add it to our byteArray and store the offset & length
+    if (add()) {
+      int len = newKey.getLength();
+      keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
+    }
+    return lastAdd;
+  }
+
+  public int add(Text value) {
+    newKey.set(value);
+    return addNewKey();
+  }
+
+  public int add(byte[] bytes, int offset, int length) {
+    newKey.set(bytes, offset, length);
+    return addNewKey();
+  }
+
+  @Override
+  protected int compareValue(int position) {
+    int start = keyOffsets.get(position);
+    int end;
+    if (position + 1 == keyOffsets.size()) {
+      end = byteArray.size();
+    } else {
+      end = keyOffsets.get(position+1);
+    }
+    return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
+                             start, end - start);
+  }
+
+  /**
+   * The information about each node.
+   */
+  public interface VisitorContext {
+    /**
+     * Get the position where the key was originally added.
+     * @return the number returned by add.
+     */
+    int getOriginalPosition();
+
+    /**
+     * Write the bytes for the string to the given output stream.
+     * @param out the stream to write to.
+     * @throws IOException
+     */
+    void writeBytes(OutputStream out) throws IOException;
+
+    /**
+     * Get the original string.
+     * @return the string
+     */
+    Text getText();
+
+    /**
+     * Get the number of bytes.
+     * @return the string's length in bytes
+     */
+    int getLength();
+  }
+
+  /**
+   * The interface for visitors.
+   */
+  public interface Visitor {
+    /**
+     * Called once for each node of the tree in sort order.
+     * @param context the information about each node
+     * @throws IOException
+     */
+    void visit(VisitorContext context) throws IOException;
+  }
+
+  private class VisitorContextImpl implements VisitorContext {
+    private int originalPosition;
+    private int start;
+    private int end;
+    private final Text text = new Text();
+
+    public int getOriginalPosition() {
+      return originalPosition;
+    }
+
+    public Text getText() {
+      byteArray.setText(text, start, end - start);
+      return text;
+    }
+
+    public void writeBytes(OutputStream out) throws IOException {
+      byteArray.write(out, start, end - start);
+    }
+
+    public int getLength() {
+      return end - start;
+    }
+
+    void setPosition(int position) {
+      originalPosition = position;
+      start = keyOffsets.get(originalPosition);
+      if (position + 1 == keyOffsets.size()) {
+        end = byteArray.size();
+      } else {
+        end = keyOffsets.get(originalPosition + 1);
+      }
+    }
+  }
+
+  private void recurse(int node, Visitor visitor, VisitorContextImpl context
+                      ) throws IOException {
+    if (node != NULL) {
+      recurse(getLeft(node), visitor, context);
+      context.setPosition(node);
+      visitor.visit(context);
+      recurse(getRight(node), visitor, context);
+    }
+  }
+
+  /**
+   * Visit all of the nodes in the tree in sorted order.
+   * @param visitor the action to be applied to each node
+   * @throws IOException
+   */
+  public void visit(Visitor visitor) throws IOException {
+    recurse(root, visitor, new VisitorContextImpl());
+  }
+
+  /**
+   * Reset the table to empty.
+   */
+  public void clear() {
+    super.clear();
+    byteArray.clear();
+    keyOffsets.clear();
+  }
+
+  public void getText(Text result, int originalPosition) {
+    int offset = keyOffsets.get(originalPosition);
+    int length;
+    if (originalPosition + 1 == keyOffsets.size()) {
+      length = byteArray.size() - offset;
+    } else {
+      length = keyOffsets.get(originalPosition + 1) - offset;
+    }
+    byteArray.setText(result, offset, length);
+  }
+
+  /**
+   * Get the size of the character data in the table.
+   * @return the bytes used by the table
+   */
+  public int getCharacterSize() {
+    return byteArray.size();
+  }
+
+  /**
+   * Calculate the approximate size in memory.
+   * @return the number of bytes used in storing the tree.
+   */
+  public long getSizeInBytes() {
+    return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
+      super.getSizeInBytes();
+  }
+}


[22/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TypeDescription.java b/orc/src/java/org/apache/orc/TypeDescription.java
deleted file mode 100644
index 2e9328b..0000000
--- a/orc/src/java/org/apache/orc/TypeDescription.java
+++ /dev/null
@@ -1,870 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * This is the description of the types in an ORC file.
- */
-public class TypeDescription
-    implements Comparable<TypeDescription>, Serializable {
-  private static final int MAX_PRECISION = 38;
-  private static final int MAX_SCALE = 38;
-  private static final int DEFAULT_PRECISION = 38;
-  private static final int DEFAULT_SCALE = 10;
-  private static final int DEFAULT_LENGTH = 256;
-
-  @Override
-  public int compareTo(TypeDescription other) {
-    if (this == other) {
-      return 0;
-    } else if (other == null) {
-      return -1;
-    } else {
-      int result = category.compareTo(other.category);
-      if (result == 0) {
-        switch (category) {
-          case CHAR:
-          case VARCHAR:
-            return maxLength - other.maxLength;
-          case DECIMAL:
-            if (precision != other.precision) {
-              return precision - other.precision;
-            }
-            return scale - other.scale;
-          case UNION:
-          case LIST:
-          case MAP:
-            if (children.size() != other.children.size()) {
-              return children.size() - other.children.size();
-            }
-            for(int c=0; result == 0 && c < children.size(); ++c) {
-              result = children.get(c).compareTo(other.children.get(c));
-            }
-            break;
-          case STRUCT:
-            if (children.size() != other.children.size()) {
-              return children.size() - other.children.size();
-            }
-            for(int c=0; result == 0 && c < children.size(); ++c) {
-              result = fieldNames.get(c).compareTo(other.fieldNames.get(c));
-              if (result == 0) {
-                result = children.get(c).compareTo(other.children.get(c));
-              }
-            }
-            break;
-          default:
-            // PASS
-        }
-      }
-      return result;
-    }
-  }
-
-  public enum Category {
-    BOOLEAN("boolean", true),
-    BYTE("tinyint", true),
-    SHORT("smallint", true),
-    INT("int", true),
-    LONG("bigint", true),
-    FLOAT("float", true),
-    DOUBLE("double", true),
-    STRING("string", true),
-    DATE("date", true),
-    TIMESTAMP("timestamp", true),
-    BINARY("binary", true),
-    DECIMAL("decimal", true),
-    VARCHAR("varchar", true),
-    CHAR("char", true),
-    LIST("array", false),
-    MAP("map", false),
-    STRUCT("struct", false),
-    UNION("uniontype", false);
-
-    Category(String name, boolean isPrimitive) {
-      this.name = name;
-      this.isPrimitive = isPrimitive;
-    }
-
-    final boolean isPrimitive;
-    final String name;
-
-    public boolean isPrimitive() {
-      return isPrimitive;
-    }
-
-    public String getName() {
-      return name;
-    }
-  }
-
-  public static TypeDescription createBoolean() {
-    return new TypeDescription(Category.BOOLEAN);
-  }
-
-  public static TypeDescription createByte() {
-    return new TypeDescription(Category.BYTE);
-  }
-
-  public static TypeDescription createShort() {
-    return new TypeDescription(Category.SHORT);
-  }
-
-  public static TypeDescription createInt() {
-    return new TypeDescription(Category.INT);
-  }
-
-  public static TypeDescription createLong() {
-    return new TypeDescription(Category.LONG);
-  }
-
-  public static TypeDescription createFloat() {
-    return new TypeDescription(Category.FLOAT);
-  }
-
-  public static TypeDescription createDouble() {
-    return new TypeDescription(Category.DOUBLE);
-  }
-
-  public static TypeDescription createString() {
-    return new TypeDescription(Category.STRING);
-  }
-
-  public static TypeDescription createDate() {
-    return new TypeDescription(Category.DATE);
-  }
-
-  public static TypeDescription createTimestamp() {
-    return new TypeDescription(Category.TIMESTAMP);
-  }
-
-  public static TypeDescription createBinary() {
-    return new TypeDescription(Category.BINARY);
-  }
-
-  public static TypeDescription createDecimal() {
-    return new TypeDescription(Category.DECIMAL);
-  }
-
-  static class StringPosition {
-    final String value;
-    int position;
-    final int length;
-
-    StringPosition(String value) {
-      this.value = value;
-      position = 0;
-      length = value.length();
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buffer = new StringBuilder();
-      buffer.append('\'');
-      buffer.append(value.substring(0, position));
-      buffer.append('^');
-      buffer.append(value.substring(position));
-      buffer.append('\'');
-      return buffer.toString();
-    }
-  }
-
-  static Category parseCategory(StringPosition source) {
-    int start = source.position;
-    while (source.position < source.length) {
-      char ch = source.value.charAt(source.position);
-      if (!Character.isLetter(ch)) {
-        break;
-      }
-      source.position += 1;
-    }
-    if (source.position != start) {
-      String word = source.value.substring(start, source.position).toLowerCase();
-      for (Category cat : Category.values()) {
-        if (cat.getName().equals(word)) {
-          return cat;
-        }
-      }
-    }
-    throw new IllegalArgumentException("Can't parse category at " + source);
-  }
-
-  static int parseInt(StringPosition source) {
-    int start = source.position;
-    int result = 0;
-    while (source.position < source.length) {
-      char ch = source.value.charAt(source.position);
-      if (!Character.isDigit(ch)) {
-        break;
-      }
-      result = result * 10 + (ch - '0');
-      source.position += 1;
-    }
-    if (source.position == start) {
-      throw new IllegalArgumentException("Missing integer at " + source);
-    }
-    return result;
-  }
-
-  static String parseName(StringPosition source) {
-    int start = source.position;
-    while (source.position < source.length) {
-      char ch = source.value.charAt(source.position);
-      if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
-        break;
-      }
-      source.position += 1;
-    }
-    if (source.position == start) {
-      throw new IllegalArgumentException("Missing name at " + source);
-    }
-    return source.value.substring(start, source.position);
-  }
-
-  static void requireChar(StringPosition source, char required) {
-    if (source.position >= source.length ||
-        source.value.charAt(source.position) != required) {
-      throw new IllegalArgumentException("Missing required char '" +
-          required + "' at " + source);
-    }
-    source.position += 1;
-  }
-
-  static boolean consumeChar(StringPosition source, char ch) {
-    boolean result = source.position < source.length &&
-        source.value.charAt(source.position) == ch;
-    if (result) {
-      source.position += 1;
-    }
-    return result;
-  }
-
-  static void parseUnion(TypeDescription type, StringPosition source) {
-    requireChar(source, '<');
-    do {
-      type.addUnionChild(parseType(source));
-    } while (consumeChar(source, ','));
-    requireChar(source, '>');
-  }
-
-  static void parseStruct(TypeDescription type, StringPosition source) {
-    requireChar(source, '<');
-    do {
-      String fieldName = parseName(source);
-      requireChar(source, ':');
-      type.addField(fieldName, parseType(source));
-    } while (consumeChar(source, ','));
-    requireChar(source, '>');
-  }
-
-  static TypeDescription parseType(StringPosition source) {
-    TypeDescription result = new TypeDescription(parseCategory(source));
-    switch (result.getCategory()) {
-      case BINARY:
-      case BOOLEAN:
-      case BYTE:
-      case DATE:
-      case DOUBLE:
-      case FLOAT:
-      case INT:
-      case LONG:
-      case SHORT:
-      case STRING:
-      case TIMESTAMP:
-        break;
-      case CHAR:
-      case VARCHAR:
-        requireChar(source, '(');
-        result.withMaxLength(parseInt(source));
-        requireChar(source, ')');
-        break;
-      case DECIMAL: {
-        requireChar(source, '(');
-        int precision = parseInt(source);
-        requireChar(source, ',');
-        result.withScale(parseInt(source));
-        result.withPrecision(precision);
-        requireChar(source, ')');
-        break;
-      }
-      case LIST:
-        requireChar(source, '<');
-        result.children.add(parseType(source));
-        requireChar(source, '>');
-        break;
-      case MAP:
-        requireChar(source, '<');
-        result.children.add(parseType(source));
-        requireChar(source, ',');
-        result.children.add(parseType(source));
-        requireChar(source, '>');
-        break;
-      case UNION:
-        parseUnion(result, source);
-        break;
-      case STRUCT:
-        parseStruct(result, source);
-        break;
-      default:
-        throw new IllegalArgumentException("Unknown type " +
-            result.getCategory() + " at " + source);
-    }
-    return result;
-  }
-
-  /**
-   * Parse TypeDescription from the Hive type names. This is the inverse
-   * of TypeDescription.toString()
-   * @param typeName the name of the type
-   * @return a new TypeDescription or null if typeName was null
-   * @throws IllegalArgumentException if the string is badly formed
-   */
-  public static TypeDescription fromString(String typeName) {
-    if (typeName == null) {
-      return null;
-    }
-    StringPosition source = new StringPosition(typeName);
-    TypeDescription result = parseType(source);
-    if (source.position != source.length) {
-      throw new IllegalArgumentException("Extra characters at " + source);
-    }
-    return result;
-  }
-
-  /**
-   * For decimal types, set the precision.
-   * @param precision the new precision
-   * @return this
-   */
-  public TypeDescription withPrecision(int precision) {
-    if (category != Category.DECIMAL) {
-      throw new IllegalArgumentException("precision is only allowed on decimal"+
-         " and not " + category.name);
-    } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
-      throw new IllegalArgumentException("precision " + precision +
-          " is out of range 1 .. " + scale);
-    }
-    this.precision = precision;
-    return this;
-  }
-
-  /**
-   * For decimal types, set the scale.
-   * @param scale the new scale
-   * @return this
-   */
-  public TypeDescription withScale(int scale) {
-    if (category != Category.DECIMAL) {
-      throw new IllegalArgumentException("scale is only allowed on decimal"+
-          " and not " + category.name);
-    } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
-      throw new IllegalArgumentException("scale is out of range at " + scale);
-    }
-    this.scale = scale;
-    return this;
-  }
-
-  public static TypeDescription createVarchar() {
-    return new TypeDescription(Category.VARCHAR);
-  }
-
-  public static TypeDescription createChar() {
-    return new TypeDescription(Category.CHAR);
-  }
-
-  /**
-   * Set the maximum length for char and varchar types.
-   * @param maxLength the maximum value
-   * @return this
-   */
-  public TypeDescription withMaxLength(int maxLength) {
-    if (category != Category.VARCHAR && category != Category.CHAR) {
-      throw new IllegalArgumentException("maxLength is only allowed on char" +
-                   " and varchar and not " + category.name);
-    }
-    this.maxLength = maxLength;
-    return this;
-  }
-
-  public static TypeDescription createList(TypeDescription childType) {
-    TypeDescription result = new TypeDescription(Category.LIST);
-    result.children.add(childType);
-    childType.parent = result;
-    return result;
-  }
-
-  public static TypeDescription createMap(TypeDescription keyType,
-                                          TypeDescription valueType) {
-    TypeDescription result = new TypeDescription(Category.MAP);
-    result.children.add(keyType);
-    result.children.add(valueType);
-    keyType.parent = result;
-    valueType.parent = result;
-    return result;
-  }
-
-  public static TypeDescription createUnion() {
-    return new TypeDescription(Category.UNION);
-  }
-
-  public static TypeDescription createStruct() {
-    return new TypeDescription(Category.STRUCT);
-  }
-
-  /**
-   * Add a child to a union type.
-   * @param child a new child type to add
-   * @return the union type.
-   */
-  public TypeDescription addUnionChild(TypeDescription child) {
-    if (category != Category.UNION) {
-      throw new IllegalArgumentException("Can only add types to union type" +
-          " and not " + category);
-    }
-    children.add(child);
-    child.parent = this;
-    return this;
-  }
-
-  /**
-   * Add a field to a struct type as it is built.
-   * @param field the field name
-   * @param fieldType the type of the field
-   * @return the struct type
-   */
-  public TypeDescription addField(String field, TypeDescription fieldType) {
-    if (category != Category.STRUCT) {
-      throw new IllegalArgumentException("Can only add fields to struct type" +
-          " and not " + category);
-    }
-    fieldNames.add(field);
-    children.add(fieldType);
-    fieldType.parent = this;
-    return this;
-  }
-
-  /**
-   * Get the id for this type.
-   * The first call will cause all of the the ids in tree to be assigned, so
-   * it should not be called before the type is completely built.
-   * @return the sequential id
-   */
-  public int getId() {
-    // if the id hasn't been assigned, assign all of the ids from the root
-    if (id == -1) {
-      TypeDescription root = this;
-      while (root.parent != null) {
-        root = root.parent;
-      }
-      root.assignIds(0);
-    }
-    return id;
-  }
-
-  public TypeDescription clone() {
-    TypeDescription result = new TypeDescription(category);
-    result.maxLength = maxLength;
-    result.precision = precision;
-    result.scale = scale;
-    if (fieldNames != null) {
-      result.fieldNames.addAll(fieldNames);
-    }
-    if (children != null) {
-      for(TypeDescription child: children) {
-        TypeDescription clone = child.clone();
-        clone.parent = result;
-        result.children.add(clone);
-      }
-    }
-    return result;
-  }
-
-  @Override
-  public int hashCode() {
-    long result = category.ordinal() * 4241 + maxLength + precision * 13 + scale;
-    if (children != null) {
-      for(TypeDescription child: children) {
-        result = result * 6959 + child.hashCode();
-      }
-    }
-    return (int) result;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof TypeDescription)) {
-      return false;
-    }
-    if (other == this) {
-      return true;
-    }
-    TypeDescription castOther = (TypeDescription) other;
-    if (category != castOther.category ||
-        maxLength != castOther.maxLength ||
-        scale != castOther.scale ||
-        precision != castOther.precision) {
-      return false;
-    }
-    if (children != null) {
-      if (children.size() != castOther.children.size()) {
-        return false;
-      }
-      for (int i = 0; i < children.size(); ++i) {
-        if (!children.get(i).equals(castOther.children.get(i))) {
-          return false;
-        }
-      }
-    }
-    if (category == Category.STRUCT) {
-      for(int i=0; i < fieldNames.size(); ++i) {
-        if (!fieldNames.get(i).equals(castOther.fieldNames.get(i))) {
-          return false;
-        }
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Get the maximum id assigned to this type or its children.
-   * The first call will cause all of the the ids in tree to be assigned, so
-   * it should not be called before the type is completely built.
-   * @return the maximum id assigned under this type
-   */
-  public int getMaximumId() {
-    // if the id hasn't been assigned, assign all of the ids from the root
-    if (maxId == -1) {
-      TypeDescription root = this;
-      while (root.parent != null) {
-        root = root.parent;
-      }
-      root.assignIds(0);
-    }
-    return maxId;
-  }
-
-  private ColumnVector createColumn(int maxSize) {
-    switch (category) {
-      case BOOLEAN:
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-      case DATE:
-        return new LongColumnVector(maxSize);
-      case TIMESTAMP:
-        return new TimestampColumnVector(maxSize);
-      case FLOAT:
-      case DOUBLE:
-        return new DoubleColumnVector(maxSize);
-      case DECIMAL:
-        return new DecimalColumnVector(maxSize, precision, scale);
-      case STRING:
-      case BINARY:
-      case CHAR:
-      case VARCHAR:
-        return new BytesColumnVector(maxSize);
-      case STRUCT: {
-        ColumnVector[] fieldVector = new ColumnVector[children.size()];
-        for(int i=0; i < fieldVector.length; ++i) {
-          fieldVector[i] = children.get(i).createColumn(maxSize);
-        }
-        return new StructColumnVector(maxSize,
-                fieldVector);
-      }
-      case UNION: {
-        ColumnVector[] fieldVector = new ColumnVector[children.size()];
-        for(int i=0; i < fieldVector.length; ++i) {
-          fieldVector[i] = children.get(i).createColumn(maxSize);
-        }
-        return new UnionColumnVector(maxSize,
-            fieldVector);
-      }
-      case LIST:
-        return new ListColumnVector(maxSize,
-            children.get(0).createColumn(maxSize));
-      case MAP:
-        return new MapColumnVector(maxSize,
-            children.get(0).createColumn(maxSize),
-            children.get(1).createColumn(maxSize));
-      default:
-        throw new IllegalArgumentException("Unknown type " + category);
-    }
-  }
-
-  public VectorizedRowBatch createRowBatch(int maxSize) {
-    VectorizedRowBatch result;
-    if (category == Category.STRUCT) {
-      result = new VectorizedRowBatch(children.size(), maxSize);
-      for(int i=0; i < result.cols.length; ++i) {
-        result.cols[i] = children.get(i).createColumn(maxSize);
-      }
-    } else {
-      result = new VectorizedRowBatch(1, maxSize);
-      result.cols[0] = createColumn(maxSize);
-    }
-    result.reset();
-    return result;
-  }
-
-  public VectorizedRowBatch createRowBatch() {
-    return createRowBatch(VectorizedRowBatch.DEFAULT_SIZE);
-  }
-
-  /**
-   * Get the kind of this type.
-   * @return get the category for this type.
-   */
-  public Category getCategory() {
-    return category;
-  }
-
-  /**
-   * Get the maximum length of the type. Only used for char and varchar types.
-   * @return the maximum length of the string type
-   */
-  public int getMaxLength() {
-    return maxLength;
-  }
-
-  /**
-   * Get the precision of the decimal type.
-   * @return the number of digits for the precision.
-   */
-  public int getPrecision() {
-    return precision;
-  }
-
-  /**
-   * Get the scale of the decimal type.
-   * @return the number of digits for the scale.
-   */
-  public int getScale() {
-    return scale;
-  }
-
-  /**
-   * For struct types, get the list of field names.
-   * @return the list of field names.
-   */
-  public List<String> getFieldNames() {
-    return Collections.unmodifiableList(fieldNames);
-  }
-
-  /**
-   * Get the subtypes of this type.
-   * @return the list of children types
-   */
-  public List<TypeDescription> getChildren() {
-    return children == null ? null : Collections.unmodifiableList(children);
-  }
-
-  /**
-   * Assign ids to all of the nodes under this one.
-   * @param startId the lowest id to assign
-   * @return the next available id
-   */
-  private int assignIds(int startId) {
-    id = startId++;
-    if (children != null) {
-      for (TypeDescription child : children) {
-        startId = child.assignIds(startId);
-      }
-    }
-    maxId = startId - 1;
-    return startId;
-  }
-
-  private TypeDescription(Category category) {
-    this.category = category;
-    if (category.isPrimitive) {
-      children = null;
-    } else {
-      children = new ArrayList<>();
-    }
-    if (category == Category.STRUCT) {
-      fieldNames = new ArrayList<>();
-    } else {
-      fieldNames = null;
-    }
-  }
-
-  private int id = -1;
-  private int maxId = -1;
-  private TypeDescription parent;
-  private final Category category;
-  private final List<TypeDescription> children;
-  private final List<String> fieldNames;
-  private int maxLength = DEFAULT_LENGTH;
-  private int precision = DEFAULT_PRECISION;
-  private int scale = DEFAULT_SCALE;
-
-  public void printToBuffer(StringBuilder buffer) {
-    buffer.append(category.name);
-    switch (category) {
-      case DECIMAL:
-        buffer.append('(');
-        buffer.append(precision);
-        buffer.append(',');
-        buffer.append(scale);
-        buffer.append(')');
-        break;
-      case CHAR:
-      case VARCHAR:
-        buffer.append('(');
-        buffer.append(maxLength);
-        buffer.append(')');
-        break;
-      case LIST:
-      case MAP:
-      case UNION:
-        buffer.append('<');
-        for(int i=0; i < children.size(); ++i) {
-          if (i != 0) {
-            buffer.append(',');
-          }
-          children.get(i).printToBuffer(buffer);
-        }
-        buffer.append('>');
-        break;
-      case STRUCT:
-        buffer.append('<');
-        for(int i=0; i < children.size(); ++i) {
-          if (i != 0) {
-            buffer.append(',');
-          }
-          buffer.append(fieldNames.get(i));
-          buffer.append(':');
-          children.get(i).printToBuffer(buffer);
-        }
-        buffer.append('>');
-        break;
-      default:
-        break;
-    }
-  }
-
-  public String toString() {
-    StringBuilder buffer = new StringBuilder();
-    printToBuffer(buffer);
-    return buffer.toString();
-  }
-
-  private void printJsonToBuffer(String prefix, StringBuilder buffer,
-                                 int indent) {
-    for(int i=0; i < indent; ++i) {
-      buffer.append(' ');
-    }
-    buffer.append(prefix);
-    buffer.append("{\"category\": \"");
-    buffer.append(category.name);
-    buffer.append("\", \"id\": ");
-    buffer.append(getId());
-    buffer.append(", \"max\": ");
-    buffer.append(maxId);
-    switch (category) {
-      case DECIMAL:
-        buffer.append(", \"precision\": ");
-        buffer.append(precision);
-        buffer.append(", \"scale\": ");
-        buffer.append(scale);
-        break;
-      case CHAR:
-      case VARCHAR:
-        buffer.append(", \"length\": ");
-        buffer.append(maxLength);
-        break;
-      case LIST:
-      case MAP:
-      case UNION:
-        buffer.append(", \"children\": [");
-        for(int i=0; i < children.size(); ++i) {
-          buffer.append('\n');
-          children.get(i).printJsonToBuffer("", buffer, indent + 2);
-          if (i != children.size() - 1) {
-            buffer.append(',');
-          }
-        }
-        buffer.append("]");
-        break;
-      case STRUCT:
-        buffer.append(", \"fields\": [");
-        for(int i=0; i < children.size(); ++i) {
-          buffer.append('\n');
-          children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
-              buffer, indent + 2);
-          if (i != children.size() - 1) {
-            buffer.append(',');
-          }
-        }
-        buffer.append(']');
-        break;
-      default:
-        break;
-    }
-    buffer.append('}');
-  }
-
-  public String toJson() {
-    StringBuilder buffer = new StringBuilder();
-    printJsonToBuffer("", buffer, 0);
-    return buffer.toString();
-  }
-
-  /**
-   * Locate a subtype by its id.
-   * @param goal the column id to look for
-   * @return the subtype
-   */
-  public TypeDescription findSubtype(int goal) {
-    // call getId method to make sure the ids are assigned
-    int id = getId();
-    if (goal < id || goal > maxId) {
-      throw new IllegalArgumentException("Unknown type id " + id + " in " +
-          toJson());
-    }
-    if (goal == id) {
-      return this;
-    } else {
-      TypeDescription prev = null;
-      for(TypeDescription next: children) {
-        if (next.id > goal) {
-          return prev.findSubtype(goal);
-        }
-        prev = next;
-      }
-      return prev.findSubtype(goal);
-    }
-  }}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/Writer.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Writer.java b/orc/src/java/org/apache/orc/Writer.java
deleted file mode 100644
index 4492062..0000000
--- a/orc/src/java/org/apache/orc/Writer.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.TypeDescription;
-
-/**
- * The interface for writing ORC files.
- */
-public interface Writer {
-
-  /**
-   * Get the schema for this writer
-   * @return the file schema
-   */
-  TypeDescription getSchema();
-
-  /**
-   * Add arbitrary meta-data to the ORC file. This may be called at any point
-   * until the Writer is closed. If the same key is passed a second time, the
-   * second value will replace the first.
-   * @param key a key to label the data with.
-   * @param value the contents of the metadata.
-   */
-  void addUserMetadata(String key, ByteBuffer value);
-
-  /**
-   * Add a row batch to the ORC file.
-   * @param batch the rows to add
-   */
-  void addRowBatch(VectorizedRowBatch batch) throws IOException;
-
-  /**
-   * Flush all of the buffers and close the file. No methods on this writer
-   * should be called afterwards.
-   * @throws IOException
-   */
-  void close() throws IOException;
-
-  /**
-   * Return the deserialized data size. Raw data size will be compute when
-   * writing the file footer. Hence raw data size value will be available only
-   * after closing the writer.
-   *
-   * @return raw data size
-   */
-  long getRawDataSize();
-
-  /**
-   * Return the number of rows in file. Row count gets updated when flushing
-   * the stripes. To get accurate row count this method should be called after
-   * closing the writer.
-   *
-   * @return row count
-   */
-  long getNumberOfRows();
-
-  /**
-   * Write an intermediate footer on the file such that if the file is
-   * truncated to the returned offset, it would be a valid ORC file.
-   * @return the offset that would be a valid end location for an ORC file
-   */
-  long writeIntermediateFooter() throws IOException;
-
-  /**
-   * Fast stripe append to ORC file. This interface is used for fast ORC file
-   * merge with other ORC files. When merging, the file to be merged should pass
-   * stripe in binary form along with stripe information and stripe statistics.
-   * After appending last stripe of a file, use appendUserMetadata() to append
-   * any user metadata.
-   * @param stripe - stripe as byte array
-   * @param offset - offset within byte array
-   * @param length - length of stripe within byte array
-   * @param stripeInfo - stripe information
-   * @param stripeStatistics - stripe statistics (Protobuf objects can be
-   *                         merged directly)
-   * @throws IOException
-   */
-  public void appendStripe(byte[] stripe, int offset, int length,
-      StripeInformation stripeInfo,
-      OrcProto.StripeStatistics stripeStatistics) throws IOException;
-
-  /**
-   * When fast stripe append is used for merging ORC stripes, after appending
-   * the last stripe from a file, this interface must be used to merge any
-   * user metadata.
-   * @param userMetadata - user metadata
-   */
-  public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/AcidStats.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/AcidStats.java b/orc/src/java/org/apache/orc/impl/AcidStats.java
deleted file mode 100644
index 6657fe9..0000000
--- a/orc/src/java/org/apache/orc/impl/AcidStats.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-/**
- * Statistics about the ACID operations in an ORC file
- */
-public class AcidStats {
-  public long inserts;
-  public long updates;
-  public long deletes;
-
-  public AcidStats() {
-    inserts = 0;
-    updates = 0;
-    deletes = 0;
-  }
-
-  public AcidStats(String serialized) {
-    String[] parts = serialized.split(",");
-    inserts = Long.parseLong(parts[0]);
-    updates = Long.parseLong(parts[1]);
-    deletes = Long.parseLong(parts[2]);
-  }
-
-  public String serialize() {
-    StringBuilder builder = new StringBuilder();
-    builder.append(inserts);
-    builder.append(",");
-    builder.append(updates);
-    builder.append(",");
-    builder.append(deletes);
-    return builder.toString();
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder builder = new StringBuilder();
-    builder.append(" inserts: ").append(inserts);
-    builder.append(" updates: ").append(updates);
-    builder.append(" deletes: ").append(deletes);
-    return builder.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/orc/impl/BitFieldReader.java
deleted file mode 100644
index dda7355..0000000
--- a/orc/src/java/org/apache/orc/impl/BitFieldReader.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.orc.impl.InStream;
-import org.apache.orc.impl.PositionProvider;
-import org.apache.orc.impl.RunLengthByteReader;
-
-public class BitFieldReader {
-  private final RunLengthByteReader input;
-  /** The number of bits in one item. Non-test code always uses 1. */
-  private final int bitSize;
-  private int current;
-  private int bitsLeft;
-  private final int mask;
-
-  public BitFieldReader(InStream input,
-      int bitSize) throws IOException {
-    this.input = new RunLengthByteReader(input);
-    this.bitSize = bitSize;
-    mask = (1 << bitSize) - 1;
-  }
-
-  public void setInStream(InStream inStream) {
-    this.input.setInStream(inStream);
-  }
-
-  private void readByte() throws IOException {
-    if (input.hasNext()) {
-      current = 0xff & input.next();
-      bitsLeft = 8;
-    } else {
-      throw new EOFException("Read past end of bit field from " + this);
-    }
-  }
-
-  public int next() throws IOException {
-    int result = 0;
-    int bitsLeftToRead = bitSize;
-    while (bitsLeftToRead > bitsLeft) {
-      result <<= bitsLeft;
-      result |= current & ((1 << bitsLeft) - 1);
-      bitsLeftToRead -= bitsLeft;
-      readByte();
-    }
-    if (bitsLeftToRead > 0) {
-      result <<= bitsLeftToRead;
-      bitsLeft -= bitsLeftToRead;
-      result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
-    }
-    return result & mask;
-  }
-
-  /**
-   * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
-   * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
-   * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
-   * work anymore once this is called. These is trivial to fix, but these are never interspersed.
-   */
-  private boolean lastRunValue;
-  private int lastRunLength = -1;
-  private void readNextRun(int maxRunLength) throws IOException {
-    assert bitSize == 1;
-    if (lastRunLength > 0) return; // last run is not exhausted yet
-    if (bitsLeft == 0) {
-      readByte();
-    }
-    // First take care of the partial bits.
-    boolean hasVal = false;
-    int runLength = 0;
-    if (bitsLeft != 8) {
-      int partialBitsMask = (1 << bitsLeft) - 1;
-      int partialBits = current & partialBitsMask;
-      if (partialBits == partialBitsMask || partialBits == 0) {
-        lastRunValue = (partialBits == partialBitsMask);
-        if (maxRunLength <= bitsLeft) {
-          lastRunLength = maxRunLength;
-          return;
-        }
-        maxRunLength -= bitsLeft;
-        hasVal = true;
-        runLength = bitsLeft;
-        bitsLeft = 0;
-      } else {
-        // There's no run in partial bits. Return whatever we have.
-        int prefixBitsCount = 32 - bitsLeft;
-        runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
-        lastRunValue = (runLength > 0);
-        lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
-          (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
-        return;
-      }
-      assert bitsLeft == 0;
-      readByte();
-    }
-    if (!hasVal) {
-      lastRunValue = ((current >> 7) == 1);
-      hasVal = true;
-    }
-    // Read full bytes until the run ends.
-    assert bitsLeft == 8;
-    while (maxRunLength >= 8
-        && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
-      runLength += 8;
-      maxRunLength -= 8;
-      readByte();
-    }
-    if (maxRunLength > 0) {
-      int extraBits = Integer.numberOfLeadingZeros(
-          lastRunValue ? (~(current | ~255)) : current) - 24;
-      bitsLeft -= extraBits;
-      runLength += extraBits;
-    }
-    lastRunLength = runLength;
-  }
-
-  public void nextVector(LongColumnVector previous,
-                         long previousLen) throws IOException {
-    previous.isRepeating = true;
-    for (int i = 0; i < previousLen; i++) {
-      if (previous.noNulls || !previous.isNull[i]) {
-        previous.vector[i] = next();
-      } else {
-        // The default value of null for int types in vectorized
-        // processing is 1, so set that if the value is null
-        previous.vector[i] = 1;
-      }
-
-      // The default value for nulls in Vectorization for int types is 1
-      // and given that non null value can also be 1, we need to check for isNull also
-      // when determining the isRepeating flag.
-      if (previous.isRepeating
-          && i > 0
-          && ((previous.vector[0] != previous.vector[i]) ||
-          (previous.isNull[0] != previous.isNull[i]))) {
-        previous.isRepeating = false;
-      }
-    }
-  }
-
-  public void seek(PositionProvider index) throws IOException {
-    input.seek(index);
-    int consumed = (int) index.getNext();
-    if (consumed > 8) {
-      throw new IllegalArgumentException("Seek past end of byte at " +
-          consumed + " in " + input);
-    } else if (consumed != 0) {
-      readByte();
-      bitsLeft = 8 - consumed;
-    } else {
-      bitsLeft = 0;
-    }
-  }
-
-  public void skip(long items) throws IOException {
-    long totalBits = bitSize * items;
-    if (bitsLeft >= totalBits) {
-      bitsLeft -= totalBits;
-    } else {
-      totalBits -= bitsLeft;
-      input.skip(totalBits / 8);
-      current = input.next();
-      bitsLeft = (int) (8 - (totalBits % 8));
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "bit reader current: " + current + " bits left: " + bitsLeft +
-        " bit size: " + bitSize + " from " + input;
-  }
-
-  boolean hasFullByte() {
-    return bitsLeft == 8 || bitsLeft == 0;
-  }
-
-  int peekOneBit() throws IOException {
-    assert bitSize == 1;
-    if (bitsLeft == 0) {
-      readByte();
-    }
-    return (current >>> (bitsLeft - 1)) & 1;
-  }
-
-  int peekFullByte() throws IOException {
-    assert bitSize == 1;
-    assert bitsLeft == 8 || bitsLeft == 0;
-    if (bitsLeft == 0) {
-      readByte();
-    }
-    return current;
-  }
-
-  void skipInCurrentByte(int bits) throws IOException {
-    assert bitsLeft >= bits;
-    bitsLeft -= bits;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java b/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
deleted file mode 100644
index aa5f886..0000000
--- a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import org.apache.orc.impl.PositionRecorder;
-import org.apache.orc.impl.PositionedOutputStream;
-import org.apache.orc.impl.RunLengthByteWriter;
-
-import java.io.IOException;
-
-public class BitFieldWriter {
-  private RunLengthByteWriter output;
-  private final int bitSize;
-  private byte current = 0;
-  private int bitsLeft = 8;
-
-  public BitFieldWriter(PositionedOutputStream output,
-                 int bitSize) throws IOException {
-    this.output = new RunLengthByteWriter(output);
-    this.bitSize = bitSize;
-  }
-
-  private void writeByte() throws IOException {
-    output.write(current);
-    current = 0;
-    bitsLeft = 8;
-  }
-
-  public void flush() throws IOException {
-    if (bitsLeft != 8) {
-      writeByte();
-    }
-    output.flush();
-  }
-
-  public void write(int value) throws IOException {
-    int bitsToWrite = bitSize;
-    while (bitsToWrite > bitsLeft) {
-      // add the bits to the bottom of the current word
-      current |= value >>> (bitsToWrite - bitsLeft);
-      // subtract out the bits we just added
-      bitsToWrite -= bitsLeft;
-      // zero out the bits above bitsToWrite
-      value &= (1 << bitsToWrite) - 1;
-      writeByte();
-    }
-    bitsLeft -= bitsToWrite;
-    current |= value << bitsLeft;
-    if (bitsLeft == 0) {
-      writeByte();
-    }
-  }
-
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(8 - bitsLeft);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/BufferChunk.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BufferChunk.java b/orc/src/java/org/apache/orc/impl/BufferChunk.java
deleted file mode 100644
index da43b96..0000000
--- a/orc/src/java/org/apache/orc/impl/BufferChunk.java
+++ /dev/null
@@ -1,85 +0,0 @@
-package org.apache.orc.impl;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.nio.ByteBuffer;
-
-/**
- * The sections of stripe that we have read.
- * This might not match diskRange - 1 disk range can be multiple buffer chunks,
- * depending on DFS block boundaries.
- */
-public class BufferChunk extends DiskRangeList {
-
-  private static final Logger LOG =
-      LoggerFactory.getLogger(BufferChunk.class);
-  final ByteBuffer chunk;
-
-  public BufferChunk(ByteBuffer chunk, long offset) {
-    super(offset, offset + chunk.remaining());
-    this.chunk = chunk;
-  }
-
-  public ByteBuffer getChunk() {
-    return chunk;
-  }
-
-  @Override
-  public boolean hasData() {
-    return chunk != null;
-  }
-
-  @Override
-  public final String toString() {
-    boolean makesSense = chunk.remaining() == (end - offset);
-    return "data range [" + offset + ", " + end + "), size: " + chunk.remaining()
-        + (makesSense ? "" : "(!)") + " type: " +
-        (chunk.isDirect() ? "direct" : "array-backed");
-  }
-
-  @Override
-  public DiskRange sliceAndShift(long offset, long end, long shiftBy) {
-    assert offset <= end && offset >= this.offset && end <= this.end;
-    assert offset + shiftBy >= 0;
-    ByteBuffer sliceBuf = chunk.slice();
-    int newPos = (int) (offset - this.offset);
-    int newLimit = newPos + (int) (end - offset);
-    try {
-      sliceBuf.position(newPos);
-      sliceBuf.limit(newLimit);
-    } catch (Throwable t) {
-      LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end
-          + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", "
-          + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") "
-          + t.getClass());
-      throw new RuntimeException(t);
-    }
-    return new BufferChunk(sliceBuf, offset + shiftBy);
-  }
-
-  @Override
-  public ByteBuffer getData() {
-    return chunk;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
deleted file mode 100644
index 76a466d..0000000
--- a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ /dev/null
@@ -1,1101 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.orc.BinaryColumnStatistics;
-import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.DateColumnStatistics;
-import org.apache.orc.DecimalColumnStatistics;
-import org.apache.orc.DoubleColumnStatistics;
-import org.apache.orc.IntegerColumnStatistics;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.TimestampColumnStatistics;
-import org.apache.orc.TypeDescription;
-
-public class ColumnStatisticsImpl implements ColumnStatistics {
-
-  private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
-      implements BooleanColumnStatistics {
-    private long trueCount = 0;
-
-    BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
-      trueCount = bkt.getCount(0);
-    }
-
-    BooleanStatisticsImpl() {
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      trueCount = 0;
-    }
-
-    @Override
-    public void updateBoolean(boolean value, int repetitions) {
-      if (value) {
-        trueCount += repetitions;
-      }
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BooleanStatisticsImpl) {
-        BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
-        trueCount += bkt.trueCount;
-      } else {
-        if (isStatsExists() && trueCount != 0) {
-          throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.BucketStatistics.Builder bucket =
-        OrcProto.BucketStatistics.newBuilder();
-      bucket.addCount(trueCount);
-      builder.setBucketStatistics(bucket);
-      return builder;
-    }
-
-    @Override
-    public long getFalseCount() {
-      return getNumberOfValues() - trueCount;
-    }
-
-    @Override
-    public long getTrueCount() {
-      return trueCount;
-    }
-
-    @Override
-    public String toString() {
-      return super.toString() + " true: " + trueCount;
-    }
-  }
-
-  private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
-      implements IntegerColumnStatistics {
-
-    private long minimum = Long.MAX_VALUE;
-    private long maximum = Long.MIN_VALUE;
-    private long sum = 0;
-    private boolean hasMinimum = false;
-    private boolean overflow = false;
-
-    IntegerStatisticsImpl() {
-    }
-
-    IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
-      if (intStat.hasMinimum()) {
-        hasMinimum = true;
-        minimum = intStat.getMinimum();
-      }
-      if (intStat.hasMaximum()) {
-        maximum = intStat.getMaximum();
-      }
-      if (intStat.hasSum()) {
-        sum = intStat.getSum();
-      } else {
-        overflow = true;
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      hasMinimum = false;
-      minimum = Long.MAX_VALUE;
-      maximum = Long.MIN_VALUE;
-      sum = 0;
-      overflow = false;
-    }
-
-    @Override
-    public void updateInteger(long value, int repetitions) {
-      if (!hasMinimum) {
-        hasMinimum = true;
-        minimum = value;
-        maximum = value;
-      } else if (value < minimum) {
-        minimum = value;
-      } else if (value > maximum) {
-        maximum = value;
-      }
-      if (!overflow) {
-        boolean wasPositive = sum >= 0;
-        sum += value * repetitions;
-        if ((value >= 0) == wasPositive) {
-          overflow = (sum >= 0) != wasPositive;
-        }
-      }
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof IntegerStatisticsImpl) {
-        IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
-        if (!hasMinimum) {
-          hasMinimum = otherInt.hasMinimum;
-          minimum = otherInt.minimum;
-          maximum = otherInt.maximum;
-        } else if (otherInt.hasMinimum) {
-          if (otherInt.minimum < minimum) {
-            minimum = otherInt.minimum;
-          }
-          if (otherInt.maximum > maximum) {
-            maximum = otherInt.maximum;
-          }
-        }
-
-        overflow |= otherInt.overflow;
-        if (!overflow) {
-          boolean wasPositive = sum >= 0;
-          sum += otherInt.sum;
-          if ((otherInt.sum >= 0) == wasPositive) {
-            overflow = (sum >= 0) != wasPositive;
-          }
-        }
-      } else {
-        if (isStatsExists() && hasMinimum) {
-          throw new IllegalArgumentException("Incompatible merging of integer column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.IntegerStatistics.Builder intb =
-        OrcProto.IntegerStatistics.newBuilder();
-      if (hasMinimum) {
-        intb.setMinimum(minimum);
-        intb.setMaximum(maximum);
-      }
-      if (!overflow) {
-        intb.setSum(sum);
-      }
-      builder.setIntStatistics(intb);
-      return builder;
-    }
-
-    @Override
-    public long getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public long getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public boolean isSumDefined() {
-      return !overflow;
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (hasMinimum) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-      }
-      if (!overflow) {
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
-       implements DoubleColumnStatistics {
-    private boolean hasMinimum = false;
-    private double minimum = Double.MAX_VALUE;
-    private double maximum = Double.MIN_VALUE;
-    private double sum = 0;
-
-    DoubleStatisticsImpl() {
-    }
-
-    DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
-      if (dbl.hasMinimum()) {
-        hasMinimum = true;
-        minimum = dbl.getMinimum();
-      }
-      if (dbl.hasMaximum()) {
-        maximum = dbl.getMaximum();
-      }
-      if (dbl.hasSum()) {
-        sum = dbl.getSum();
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      hasMinimum = false;
-      minimum = Double.MAX_VALUE;
-      maximum = Double.MIN_VALUE;
-      sum = 0;
-    }
-
-    @Override
-    public void updateDouble(double value) {
-      if (!hasMinimum) {
-        hasMinimum = true;
-        minimum = value;
-        maximum = value;
-      } else if (value < minimum) {
-        minimum = value;
-      } else if (value > maximum) {
-        maximum = value;
-      }
-      sum += value;
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DoubleStatisticsImpl) {
-        DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
-        if (!hasMinimum) {
-          hasMinimum = dbl.hasMinimum;
-          minimum = dbl.minimum;
-          maximum = dbl.maximum;
-        } else if (dbl.hasMinimum) {
-          if (dbl.minimum < minimum) {
-            minimum = dbl.minimum;
-          }
-          if (dbl.maximum > maximum) {
-            maximum = dbl.maximum;
-          }
-        }
-        sum += dbl.sum;
-      } else {
-        if (isStatsExists() && hasMinimum) {
-          throw new IllegalArgumentException("Incompatible merging of double column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.DoubleStatistics.Builder dbl =
-        OrcProto.DoubleStatistics.newBuilder();
-      if (hasMinimum) {
-        dbl.setMinimum(minimum);
-        dbl.setMaximum(maximum);
-      }
-      dbl.setSum(sum);
-      builder.setDoubleStatistics(dbl);
-      return builder;
-    }
-
-    @Override
-    public double getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public double getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public double getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (hasMinimum) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-      }
-      buf.append(" sum: ");
-      buf.append(sum);
-      return buf.toString();
-    }
-  }
-
-  protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
-      implements StringColumnStatistics {
-    private Text minimum = null;
-    private Text maximum = null;
-    private long sum = 0;
-
-    StringStatisticsImpl() {
-    }
-
-    StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.StringStatistics str = stats.getStringStatistics();
-      if (str.hasMaximum()) {
-        maximum = new Text(str.getMaximum());
-      }
-      if (str.hasMinimum()) {
-        minimum = new Text(str.getMinimum());
-      }
-      if(str.hasSum()) {
-        sum = str.getSum();
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-      sum = 0;
-    }
-
-    @Override
-    public void updateString(Text value) {
-      if (minimum == null) {
-        maximum = minimum = new Text(value);
-      } else if (minimum.compareTo(value) > 0) {
-        minimum = new Text(value);
-      } else if (maximum.compareTo(value) < 0) {
-        maximum = new Text(value);
-      }
-      sum += value.getLength();
-    }
-
-    @Override
-    public void updateString(byte[] bytes, int offset, int length,
-                             int repetitions) {
-      if (minimum == null) {
-        maximum = minimum = new Text();
-        maximum.set(bytes, offset, length);
-      } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
-          minimum.getLength(), bytes, offset, length) > 0) {
-        minimum = new Text();
-        minimum.set(bytes, offset, length);
-      } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
-          maximum.getLength(), bytes, offset, length) < 0) {
-        maximum = new Text();
-        maximum.set(bytes, offset, length);
-      }
-      sum += length * repetitions;
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof StringStatisticsImpl) {
-        StringStatisticsImpl str = (StringStatisticsImpl) other;
-        if (minimum == null) {
-          if (str.minimum != null) {
-            maximum = new Text(str.getMaximum());
-            minimum = new Text(str.getMinimum());
-          } else {
-          /* both are empty */
-            maximum = minimum = null;
-          }
-        } else if (str.minimum != null) {
-          if (minimum.compareTo(str.minimum) > 0) {
-            minimum = new Text(str.getMinimum());
-          }
-          if (maximum.compareTo(str.maximum) < 0) {
-            maximum = new Text(str.getMaximum());
-          }
-        }
-        sum += str.sum;
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of string column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.StringStatistics.Builder str =
-        OrcProto.StringStatistics.newBuilder();
-      if (getNumberOfValues() != 0) {
-        str.setMinimum(getMinimum());
-        str.setMaximum(getMaximum());
-        str.setSum(sum);
-      }
-      result.setStringStatistics(str);
-      return result;
-    }
-
-    @Override
-    public String getMinimum() {
-      return minimum == null ? null : minimum.toString();
-    }
-
-    @Override
-    public String getMaximum() {
-      return maximum == null ? null : maximum.toString();
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
-      BinaryColumnStatistics {
-
-    private long sum = 0;
-
-    BinaryStatisticsImpl() {
-    }
-
-    BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
-      if (binStats.hasSum()) {
-        sum = binStats.getSum();
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      sum = 0;
-    }
-
-    @Override
-    public void updateBinary(BytesWritable value) {
-      sum += value.getLength();
-    }
-
-    @Override
-    public void updateBinary(byte[] bytes, int offset, int length,
-                             int repetitions) {
-      sum += length * repetitions;
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BinaryColumnStatistics) {
-        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
-        sum += bin.sum;
-      } else {
-        if (isStatsExists() && sum != 0) {
-          throw new IllegalArgumentException("Incompatible merging of binary column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
-      bin.setSum(sum);
-      result.setBinaryStatistics(bin);
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
-      implements DecimalColumnStatistics {
-
-    // These objects are mutable for better performance.
-    private HiveDecimalWritable minimum = null;
-    private HiveDecimalWritable maximum = null;
-    private HiveDecimalWritable sum = new HiveDecimalWritable(0);
-
-    DecimalStatisticsImpl() {
-    }
-
-    DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
-      if (dec.hasMaximum()) {
-        maximum = new HiveDecimalWritable(dec.getMaximum());
-      }
-      if (dec.hasMinimum()) {
-        minimum = new HiveDecimalWritable(dec.getMinimum());
-      }
-      if (dec.hasSum()) {
-        sum = new HiveDecimalWritable(dec.getSum());
-      } else {
-        sum = null;
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-      sum = new HiveDecimalWritable(0);
-    }
-
-    @Override
-    public void updateDecimal(HiveDecimalWritable value) {
-      if (minimum == null) {
-        minimum = new HiveDecimalWritable(value);
-        maximum = new HiveDecimalWritable(value);
-      } else if (minimum.compareTo(value) > 0) {
-        minimum.set(value);
-      } else if (maximum.compareTo(value) < 0) {
-        maximum.set(value);
-      }
-      if (sum != null) {
-        sum.mutateAdd(value);
-      }
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DecimalStatisticsImpl) {
-        DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = (dec.minimum != null ? new HiveDecimalWritable(dec.minimum) : null);
-          maximum = (dec.maximum != null ? new HiveDecimalWritable(dec.maximum) : null);
-          sum = dec.sum;
-        } else if (dec.minimum != null) {
-          if (minimum.compareTo(dec.minimum) > 0) {
-            minimum.set(dec.minimum);
-          }
-          if (maximum.compareTo(dec.maximum) < 0) {
-            maximum.set(dec.maximum);
-          }
-          if (sum == null || dec.sum == null) {
-            sum = null;
-          } else {
-            sum.mutateAdd(dec.sum);
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.DecimalStatistics.Builder dec =
-          OrcProto.DecimalStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        dec.setMinimum(minimum.toString());
-        dec.setMaximum(maximum.toString());
-      }
-      // Check isSet for overflow.
-      if (sum != null && sum.isSet()) {
-        dec.setSum(sum.toString());
-      }
-      result.setDecimalStatistics(dec);
-      return result;
-    }
-
-    @Override
-    public HiveDecimal getMinimum() {
-      return minimum == null ? null : minimum.getHiveDecimal();
-    }
-
-    @Override
-    public HiveDecimal getMaximum() {
-      return maximum == null ? null : maximum.getHiveDecimal();
-    }
-
-    @Override
-    public HiveDecimal getSum() {
-      return sum == null ? null : sum.getHiveDecimal();
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-        if (sum != null) {
-          buf.append(" sum: ");
-          buf.append(sum);
-        }
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DateStatisticsImpl extends ColumnStatisticsImpl
-      implements DateColumnStatistics {
-    private Integer minimum = null;
-    private Integer maximum = null;
-
-    DateStatisticsImpl() {
-    }
-
-    DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DateStatistics dateStats = stats.getDateStatistics();
-      // min,max values serialized/deserialized as int (days since epoch)
-      if (dateStats.hasMaximum()) {
-        maximum = dateStats.getMaximum();
-      }
-      if (dateStats.hasMinimum()) {
-        minimum = dateStats.getMinimum();
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-    }
-
-    @Override
-    public void updateDate(DateWritable value) {
-      if (minimum == null) {
-        minimum = value.getDays();
-        maximum = value.getDays();
-      } else if (minimum > value.getDays()) {
-        minimum = value.getDays();
-      } else if (maximum < value.getDays()) {
-        maximum = value.getDays();
-      }
-    }
-
-    @Override
-    public void updateDate(int value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum > value) {
-        minimum = value;
-      } else if (maximum < value) {
-        maximum = value;
-      }
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DateStatisticsImpl) {
-        DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = dateStats.minimum;
-          maximum = dateStats.maximum;
-        } else if (dateStats.minimum != null) {
-          if (minimum > dateStats.minimum) {
-            minimum = dateStats.minimum;
-          }
-          if (maximum < dateStats.maximum) {
-            maximum = dateStats.maximum;
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of date column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.DateStatistics.Builder dateStats =
-          OrcProto.DateStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        dateStats.setMinimum(minimum);
-        dateStats.setMaximum(maximum);
-      }
-      result.setDateStatistics(dateStats);
-      return result;
-    }
-
-    private transient final DateWritable minDate = new DateWritable();
-    private transient final DateWritable maxDate = new DateWritable();
-
-    @Override
-    public Date getMinimum() {
-      if (minimum == null) {
-        return null;
-      }
-      minDate.set(minimum);
-      return minDate.get();
-    }
-
-    @Override
-    public Date getMaximum() {
-      if (maximum == null) {
-        return null;
-      }
-      maxDate.set(maximum);
-      return maxDate.get();
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
-      implements TimestampColumnStatistics {
-    private Long minimum = null;
-    private Long maximum = null;
-
-    TimestampStatisticsImpl() {
-    }
-
-    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
-      // min,max values serialized/deserialized as int (milliseconds since epoch)
-      if (timestampStats.hasMaximum()) {
-        maximum = timestampStats.getMaximum();
-      }
-      if (timestampStats.hasMinimum()) {
-        minimum = timestampStats.getMinimum();
-      }
-    }
-
-    @Override
-    public void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-    }
-
-    @Override
-    public void updateTimestamp(Timestamp value) {
-      if (minimum == null) {
-        minimum = value.getTime();
-        maximum = value.getTime();
-      } else if (minimum > value.getTime()) {
-        minimum = value.getTime();
-      } else if (maximum < value.getTime()) {
-        maximum = value.getTime();
-      }
-    }
-
-    @Override
-    public void updateTimestamp(long value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum > value) {
-        minimum = value;
-      } else if (maximum < value) {
-        maximum = value;
-      }
-    }
-
-    @Override
-    public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof TimestampStatisticsImpl) {
-        TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = timestampStats.minimum;
-          maximum = timestampStats.maximum;
-        } else if (timestampStats.minimum != null) {
-          if (minimum > timestampStats.minimum) {
-            minimum = timestampStats.minimum;
-          }
-          if (maximum < timestampStats.maximum) {
-            maximum = timestampStats.maximum;
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
-          .newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        timestampStats.setMinimum(minimum);
-        timestampStats.setMaximum(maximum);
-      }
-      result.setTimestampStatistics(timestampStats);
-      return result;
-    }
-
-    @Override
-    public Timestamp getMinimum() {
-      return minimum == null ? null : new Timestamp(minimum);
-    }
-
-    @Override
-    public Timestamp getMaximum() {
-      return maximum == null ? null : new Timestamp(maximum);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-      }
-      return buf.toString();
-    }
-  }
-
-  private long count = 0;
-  private boolean hasNull = false;
-
-  ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
-    if (stats.hasNumberOfValues()) {
-      count = stats.getNumberOfValues();
-    }
-
-    if (stats.hasHasNull()) {
-      hasNull = stats.getHasNull();
-    } else {
-      hasNull = true;
-    }
-  }
-
-  ColumnStatisticsImpl() {
-  }
-
-  public void increment() {
-    count += 1;
-  }
-
-  public void increment(int count) {
-    this.count += count;
-  }
-
-  public void setNull() {
-    hasNull = true;
-  }
-
-  public void updateBoolean(boolean value, int repetitions) {
-    throw new UnsupportedOperationException("Can't update boolean");
-  }
-
-  public void updateInteger(long value, int repetitions) {
-    throw new UnsupportedOperationException("Can't update integer");
-  }
-
-  public void updateDouble(double value) {
-    throw new UnsupportedOperationException("Can't update double");
-  }
-
-  public void updateString(Text value) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  public void updateString(byte[] bytes, int offset, int length,
-                           int repetitions) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  public void updateBinary(BytesWritable value) {
-    throw new UnsupportedOperationException("Can't update binary");
-  }
-
-  public void updateBinary(byte[] bytes, int offset, int length,
-                           int repetitions) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  public void updateDecimal(HiveDecimalWritable value) {
-    throw new UnsupportedOperationException("Can't update decimal");
-  }
-
-  public void updateDate(DateWritable value) {
-    throw new UnsupportedOperationException("Can't update date");
-  }
-
-  public void updateDate(int value) {
-    throw new UnsupportedOperationException("Can't update date");
-  }
-
-  public void updateTimestamp(Timestamp value) {
-    throw new UnsupportedOperationException("Can't update timestamp");
-  }
-
-  public void updateTimestamp(long value) {
-    throw new UnsupportedOperationException("Can't update timestamp");
-  }
-
-  public boolean isStatsExists() {
-    return (count > 0 || hasNull == true);
-  }
-
-  public void merge(ColumnStatisticsImpl stats) {
-    count += stats.count;
-    hasNull |= stats.hasNull;
-  }
-
-  public void reset() {
-    count = 0;
-    hasNull = false;
-  }
-
-  @Override
-  public long getNumberOfValues() {
-    return count;
-  }
-
-  @Override
-  public boolean hasNull() {
-    return hasNull;
-  }
-
-  @Override
-  public String toString() {
-    return "count: " + count + " hasNull: " + hasNull;
-  }
-
-  public OrcProto.ColumnStatistics.Builder serialize() {
-    OrcProto.ColumnStatistics.Builder builder =
-      OrcProto.ColumnStatistics.newBuilder();
-    builder.setNumberOfValues(count);
-    builder.setHasNull(hasNull);
-    return builder;
-  }
-
-  public static ColumnStatisticsImpl create(TypeDescription schema) {
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-        return new BooleanStatisticsImpl();
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-        return new IntegerStatisticsImpl();
-      case FLOAT:
-      case DOUBLE:
-        return new DoubleStatisticsImpl();
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return new StringStatisticsImpl();
-      case DECIMAL:
-        return new DecimalStatisticsImpl();
-      case DATE:
-        return new DateStatisticsImpl();
-      case TIMESTAMP:
-        return new TimestampStatisticsImpl();
-      case BINARY:
-        return new BinaryStatisticsImpl();
-      default:
-        return new ColumnStatisticsImpl();
-    }
-  }
-
-  public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
-    if (stats.hasBucketStatistics()) {
-      return new BooleanStatisticsImpl(stats);
-    } else if (stats.hasIntStatistics()) {
-      return new IntegerStatisticsImpl(stats);
-    } else if (stats.hasDoubleStatistics()) {
-      return new DoubleStatisticsImpl(stats);
-    } else if (stats.hasStringStatistics()) {
-      return new StringStatisticsImpl(stats);
-    } else if (stats.hasDecimalStatistics()) {
-      return new DecimalStatisticsImpl(stats);
-    } else if (stats.hasDateStatistics()) {
-      return new DateStatisticsImpl(stats);
-    } else if (stats.hasTimestampStatistics()) {
-      return new TimestampStatisticsImpl(stats);
-    } else if(stats.hasBinaryStatistics()) {
-      return new BinaryStatisticsImpl(stats);
-    } else {
-      return new ColumnStatisticsImpl(stats);
-    }
-  }
-}


[09/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java b/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java
new file mode 100644
index 0000000..dbdded5
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestBitFieldReader.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestBitFieldReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    final int COUNT = 16384;
+    BitFieldWriter out = new BitFieldWriter(
+        new OutStream("test", 500, codec, collect), 1);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[COUNT];
+    for(int i=0; i < COUNT; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, non-runs
+      if (i < COUNT / 2) {
+        out.write(i & 1);
+      } else {
+        out.write((i/3) & 1);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    BitFieldReader in = new BitFieldReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+        codec, 500), 1);
+    for(int i=0; i < COUNT; ++i) {
+      int x = in.next();
+      if (i < COUNT / 2) {
+        assertEquals(i & 1, x);
+      } else {
+        assertEquals((i/3) & 1, x);
+      }
+    }
+    for(int i=COUNT-1; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = in.next();
+      if (i < COUNT / 2) {
+        assertEquals(i & 1, x);
+      } else {
+        assertEquals((i/3) & 1, x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testBiggerItems() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    final int COUNT = 16384;
+    BitFieldWriter out = new BitFieldWriter(
+        new OutStream("test", 500, null, collect), 3);
+    for(int i=0; i < COUNT; ++i) {
+      // test runs, non-runs
+      if (i < COUNT / 2) {
+        out.write(i & 7);
+      } else {
+        out.write((i/3) & 7);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    BitFieldReader in = new BitFieldReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+        null, 500), 3);
+    for(int i=0; i < COUNT; ++i) {
+      int x = in.next();
+      if (i < COUNT / 2) {
+        assertEquals(i & 7, x);
+      } else {
+        assertEquals((i/3) & 7, x);
+      }
+    }
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    BitFieldWriter out = new BitFieldWriter(
+        new OutStream("test", 100, null, collect), 1);
+    final int COUNT = 16384;
+    for(int i=0; i < COUNT; ++i) {
+      if (i < COUNT/2) {
+        out.write(i & 1);
+      } else {
+        out.write((i/3) & 1);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    BitFieldReader in = new BitFieldReader(InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), null, 100), 1);
+    for(int i=0; i < COUNT; i += 5) {
+      int x = (int) in.next();
+      if (i < COUNT/2) {
+        assertEquals(i & 1, x);
+      } else {
+        assertEquals((i/3) & 1, x);
+      }
+      if (i < COUNT - 5) {
+        in.skip(4);
+      }
+      in.skip(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java b/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java
new file mode 100644
index 0000000..1790c35
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestBitPack.java
@@ -0,0 +1,279 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import com.google.common.primitives.Longs;
+
+public class TestBitPack {
+
+  private static final int SIZE = 100;
+  private static Random rand = new Random(100);
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
+      + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  private long[] deltaEncode(long[] inp) {
+    long[] output = new long[inp.length];
+    SerializationUtils utils = new SerializationUtils();
+    for (int i = 0; i < inp.length; i++) {
+      output[i] = utils.zigzagEncode(inp[i]);
+    }
+    return output;
+  }
+
+  private long nextLong(Random rng, long n) {
+    long bits, val;
+    do {
+      bits = (rng.nextLong() << 1) >>> 1;
+      val = bits % n;
+    } while (bits - val + (n - 1) < 0L);
+    return val;
+  }
+
+  private void runTest(int numBits) throws IOException {
+    long[] inp = new long[SIZE];
+    for (int i = 0; i < SIZE; i++) {
+      long val = 0;
+      if (numBits <= 32) {
+        if (numBits == 1) {
+          val = -1 * rand.nextInt(2);
+        } else {
+          val = rand.nextInt((int) Math.pow(2, numBits - 1));
+        }
+      } else {
+        val = nextLong(rand, (long) Math.pow(2, numBits - 2));
+      }
+      if (val % 2 == 0) {
+        val = -val;
+      }
+      inp[i] = val;
+    }
+    long[] deltaEncoded = deltaEncode(inp);
+    long minInput = Collections.min(Longs.asList(deltaEncoded));
+    long maxInput = Collections.max(Longs.asList(deltaEncoded));
+    long rangeInput = maxInput - minInput;
+    SerializationUtils utils = new SerializationUtils();
+    int fixedWidth = utils.findClosestNumBits(rangeInput);
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    OutStream output = new OutStream("test", SIZE, null, collect);
+    utils.writeInts(deltaEncoded, 0, deltaEncoded.length, fixedWidth, output);
+    output.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    long[] buff = new long[SIZE];
+    utils.readInts(buff, 0, SIZE, fixedWidth, InStream.create("test", new ByteBuffer[] { inBuf },
+        new long[] { 0 }, inBuf.remaining(), null, SIZE));
+    for (int i = 0; i < SIZE; i++) {
+      buff[i] = utils.zigzagDecode(buff[i]);
+    }
+    assertEquals(numBits, fixedWidth);
+    assertArrayEquals(inp, buff);
+  }
+
+  @Test
+  public void test01BitPacking1Bit() throws IOException {
+    runTest(1);
+  }
+
+  @Test
+  public void test02BitPacking2Bit() throws IOException {
+    runTest(2);
+  }
+
+  @Test
+  public void test03BitPacking3Bit() throws IOException {
+    runTest(3);
+  }
+
+  @Test
+  public void test04BitPacking4Bit() throws IOException {
+    runTest(4);
+  }
+
+  @Test
+  public void test05BitPacking5Bit() throws IOException {
+    runTest(5);
+  }
+
+  @Test
+  public void test06BitPacking6Bit() throws IOException {
+    runTest(6);
+  }
+
+  @Test
+  public void test07BitPacking7Bit() throws IOException {
+    runTest(7);
+  }
+
+  @Test
+  public void test08BitPacking8Bit() throws IOException {
+    runTest(8);
+  }
+
+  @Test
+  public void test09BitPacking9Bit() throws IOException {
+    runTest(9);
+  }
+
+  @Test
+  public void test10BitPacking10Bit() throws IOException {
+    runTest(10);
+  }
+
+  @Test
+  public void test11BitPacking11Bit() throws IOException {
+    runTest(11);
+  }
+
+  @Test
+  public void test12BitPacking12Bit() throws IOException {
+    runTest(12);
+  }
+
+  @Test
+  public void test13BitPacking13Bit() throws IOException {
+    runTest(13);
+  }
+
+  @Test
+  public void test14BitPacking14Bit() throws IOException {
+    runTest(14);
+  }
+
+  @Test
+  public void test15BitPacking15Bit() throws IOException {
+    runTest(15);
+  }
+
+  @Test
+  public void test16BitPacking16Bit() throws IOException {
+    runTest(16);
+  }
+
+  @Test
+  public void test17BitPacking17Bit() throws IOException {
+    runTest(17);
+  }
+
+  @Test
+  public void test18BitPacking18Bit() throws IOException {
+    runTest(18);
+  }
+
+  @Test
+  public void test19BitPacking19Bit() throws IOException {
+    runTest(19);
+  }
+
+  @Test
+  public void test20BitPacking20Bit() throws IOException {
+    runTest(20);
+  }
+
+  @Test
+  public void test21BitPacking21Bit() throws IOException {
+    runTest(21);
+  }
+
+  @Test
+  public void test22BitPacking22Bit() throws IOException {
+    runTest(22);
+  }
+
+  @Test
+  public void test23BitPacking23Bit() throws IOException {
+    runTest(23);
+  }
+
+  @Test
+  public void test24BitPacking24Bit() throws IOException {
+    runTest(24);
+  }
+
+  @Test
+  public void test26BitPacking26Bit() throws IOException {
+    runTest(26);
+  }
+
+  @Test
+  public void test28BitPacking28Bit() throws IOException {
+    runTest(28);
+  }
+
+  @Test
+  public void test30BitPacking30Bit() throws IOException {
+    runTest(30);
+  }
+
+  @Test
+  public void test32BitPacking32Bit() throws IOException {
+    runTest(32);
+  }
+
+  @Test
+  public void test40BitPacking40Bit() throws IOException {
+    runTest(40);
+  }
+
+  @Test
+  public void test48BitPacking48Bit() throws IOException {
+    runTest(48);
+  }
+
+  @Test
+  public void test56BitPacking56Bit() throws IOException {
+    runTest(56);
+  }
+
+  @Test
+  public void test64BitPacking64Bit() throws IOException {
+    runTest(64);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java b/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java
new file mode 100644
index 0000000..4708043
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestColumnStatisticsImpl.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.TypeDescription;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TestColumnStatisticsImpl {
+
+  @Test
+  public void testUpdateDate() throws Exception {
+    ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(TypeDescription.createDate());
+    DateWritable date = new DateWritable(16400);
+    stat.increment();
+    stat.updateDate(date);
+    assertDateStatistics(stat, 1, 16400, 16400);
+
+    date.set(16410);
+    stat.increment();
+    stat.updateDate(date);
+    assertDateStatistics(stat, 2, 16400, 16410);
+
+    date.set(16420);
+    stat.increment();
+    stat.updateDate(date);
+    assertDateStatistics(stat, 3, 16400, 16420);
+  }
+
+  private void assertDateStatistics(ColumnStatisticsImpl stat, int count, int minimum, int maximum) {
+    OrcProto.ColumnStatistics.Builder builder = stat.serialize();
+
+    assertEquals(count, builder.getNumberOfValues());
+    assertTrue(builder.hasDateStatistics());
+    assertFalse(builder.hasStringStatistics());
+
+    OrcProto.DateStatistics protoStat = builder.getDateStatistics();
+    assertTrue(protoStat.hasMinimum());
+    assertEquals(minimum, protoStat.getMinimum());
+    assertTrue(protoStat.hasMaximum());
+    assertEquals(maximum, protoStat.getMaximum());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java b/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java
new file mode 100644
index 0000000..1e53f55
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestDataReaderProperties.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.CompressionKind;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.mock;
+
+public class TestDataReaderProperties {
+
+  private FileSystem mockedFileSystem = mock(FileSystem.class);
+  private Path mockedPath = mock(Path.class);
+  private boolean mockedZeroCopy = false;
+
+  @Test
+  public void testCompleteBuild() {
+    DataReaderProperties properties = DataReaderProperties.builder()
+      .withFileSystem(mockedFileSystem)
+      .withPath(mockedPath)
+      .withCompression(CompressionKind.ZLIB)
+      .withZeroCopy(mockedZeroCopy)
+      .build();
+    assertEquals(mockedFileSystem, properties.getFileSystem());
+    assertEquals(mockedPath, properties.getPath());
+    assertEquals(CompressionKind.ZLIB, properties.getCompression());
+    assertEquals(mockedZeroCopy, properties.getZeroCopy());
+  }
+
+  @Test
+  public void testMissingNonRequiredArgs() {
+    DataReaderProperties properties = DataReaderProperties.builder()
+      .withFileSystem(mockedFileSystem)
+      .withPath(mockedPath)
+      .build();
+    assertEquals(mockedFileSystem, properties.getFileSystem());
+    assertEquals(mockedPath, properties.getPath());
+    assertNull(properties.getCompression());
+    assertFalse(properties.getZeroCopy());
+  }
+
+  @Test(expected = java.lang.NullPointerException.class)
+  public void testEmptyBuild() {
+    DataReaderProperties.builder().build();
+  }
+
+  @Test(expected = java.lang.NullPointerException.class)
+  public void testMissingPath() {
+    DataReaderProperties.builder()
+      .withFileSystem(mockedFileSystem)
+      .withCompression(CompressionKind.NONE)
+      .withZeroCopy(mockedZeroCopy)
+      .build();
+  }
+
+  @Test(expected = java.lang.NullPointerException.class)
+  public void testMissingFileSystem() {
+    DataReaderProperties.builder()
+      .withPath(mockedPath)
+      .withCompression(CompressionKind.NONE)
+      .withZeroCopy(mockedZeroCopy)
+      .build();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java b/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java
new file mode 100644
index 0000000..408be12
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestDynamicArray.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestDynamicArray {
+
+  @Test
+  public void testByteArray() throws Exception {
+    DynamicByteArray dba = new DynamicByteArray(3, 10);
+    dba.add((byte) 0);
+    dba.add((byte) 1);
+    dba.set(3, (byte) 3);
+    dba.set(2, (byte) 2);
+    dba.add((byte) 4);
+    assertEquals("{0,1,2,3,4}", dba.toString());
+    assertEquals(5, dba.size());
+    byte[] val;
+    val = new byte[0];
+    assertEquals(0, dba.compare(val, 0, 0, 2, 0));
+    assertEquals(-1, dba.compare(val, 0, 0, 2, 1));
+    val = new byte[]{3,42};
+    assertEquals(1, dba.compare(val, 0, 1, 2, 0));
+    assertEquals(1, dba.compare(val, 0, 1, 2, 1));
+    assertEquals(0, dba.compare(val, 0, 1, 3, 1));
+    assertEquals(-1, dba.compare(val, 0, 1, 3, 2));
+    assertEquals(1, dba.compare(val, 0, 2, 3, 1));
+    val = new byte[256];
+    for(int b=-128; b < 128; ++b) {
+      dba.add((byte) b);
+      val[b+128] = (byte) b;
+    }
+    assertEquals(0, dba.compare(val, 0, 256, 5, 256));
+    assertEquals(1, dba.compare(val, 0, 1, 0, 1));
+    assertEquals(1, dba.compare(val, 254, 1, 0, 1));
+    assertEquals(1, dba.compare(val, 120, 1, 64, 1));
+    val = new byte[1024];
+    Random rand = new Random(1701);
+    for(int i = 0; i < val.length; ++i) {
+      rand.nextBytes(val);
+    }
+    dba.add(val, 0, 1024);
+    assertEquals(1285, dba.size());
+    assertEquals(0, dba.compare(val, 0, 1024, 261, 1024));
+  }
+
+  @Test
+  public void testIntArray() throws Exception {
+    DynamicIntArray dia = new DynamicIntArray(10);
+    for(int i=0; i < 10000; ++i) {
+      dia.add(2*i);
+    }
+    assertEquals(10000, dia.size());
+    for(int i=0; i < 10000; ++i) {
+      assertEquals(2*i, dia.get(i));
+    }
+    dia.clear();
+    assertEquals(0, dia.size());
+    dia.add(3);
+    dia.add(12);
+    dia.add(65);
+    assertEquals("{3,12,65}", dia.toString());
+    for(int i=0; i < 5; ++i) {
+      dia.increment(i, 3);
+    }
+    assertEquals("{6,15,68,3,3}", dia.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestInStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestInStream.java b/orc/src/test/org/apache/hive/orc/impl/TestInStream.java
new file mode 100644
index 0000000..ffaef54
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestInStream.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestInStream {
+
+  static class OutputCollector implements OutStream.OutputReceiver {
+    DynamicByteArray buffer = new DynamicByteArray();
+
+    @Override
+    public void output(ByteBuffer buffer) throws IOException {
+      this.buffer.add(buffer.array(), buffer.arrayOffset() + buffer.position(),
+          buffer.remaining());
+    }
+  }
+
+  static class PositionCollector
+      implements PositionProvider, PositionRecorder {
+    private List<Long> positions = new ArrayList<Long>();
+    private int index = 0;
+
+    @Override
+    public long getNext() {
+      return positions.get(index++);
+    }
+
+    @Override
+    public void addPosition(long offset) {
+      positions.add(offset);
+    }
+
+    public void reset() {
+      index = 0;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder builder = new StringBuilder("position: ");
+      for(int i=0; i < positions.size(); ++i) {
+        if (i != 0) {
+          builder.append(", ");
+        }
+        builder.append(positions.get(i));
+      }
+      return builder.toString();
+    }
+  }
+
+  @Test
+  public void testUncompressed() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    OutStream out = new OutStream("test", 100, null, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      out.write(i);
+    }
+    out.flush();
+    assertEquals(1024, collect.buffer.size());
+    for(int i=0; i < 1024; ++i) {
+      assertEquals((byte) i, collect.buffer.get(i));
+    }
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), null, 100);
+    assertEquals("uncompressed stream test position: 0 length: 1024" +
+                 " range: 0 offset: 0 limit: 0",
+                 in.toString());
+    for(int i=0; i < 1024; ++i) {
+      int x = in.read();
+      assertEquals(i & 0xff, x);
+    }
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i & 0xff, in.read());
+    }
+  }
+
+  @Test
+  public void testCompressed() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    CompressionCodec codec = new ZlibCodec();
+    OutStream out = new OutStream("test", 300, codec, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      out.write(i);
+    }
+    out.flush();
+    assertEquals("test", out.toString());
+    assertEquals(961, collect.buffer.size());
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), codec, 300);
+    assertEquals("compressed stream test position: 0 length: 961 range: 0" +
+                 " offset: 0 limit: 0 range 0 = 0 to 961",
+                 in.toString());
+    for(int i=0; i < 1024; ++i) {
+      int x = in.read();
+      assertEquals(i & 0xff, x);
+    }
+    assertEquals(0, in.available());
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i & 0xff, in.read());
+    }
+  }
+
+  @Test
+  public void testCorruptStream() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    CompressionCodec codec = new ZlibCodec();
+    OutStream out = new OutStream("test", 500, codec, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      out.write(i);
+    }
+    out.flush();
+
+    // now try to read the stream with a buffer that is too small
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
+        new long[]{0}, inBuf.remaining(), codec, 100);
+    byte[] contents = new byte[1024];
+    try {
+      in.read(contents);
+      fail();
+    } catch(IllegalArgumentException iae) {
+      // EXPECTED
+    }
+
+    // make a corrupted header
+    inBuf.clear();
+    inBuf.put((byte) 32);
+    inBuf.put((byte) 0);
+    inBuf.flip();
+    in = InStream.create("test2", new ByteBuffer[]{inBuf}, new long[]{0},
+        inBuf.remaining(), codec, 300);
+    try {
+      in.read();
+      fail();
+    } catch (IllegalStateException ise) {
+      // EXPECTED
+    }
+  }
+
+  @Test
+  public void testDisjointBuffers() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    CompressionCodec codec = new ZlibCodec();
+    OutStream out = new OutStream("test", 400, codec, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    DataOutput stream = new DataOutputStream(out);
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      stream.writeInt(i);
+    }
+    out.flush();
+    assertEquals("test", out.toString());
+    assertEquals(1674, collect.buffer.size());
+    ByteBuffer[] inBuf = new ByteBuffer[3];
+    inBuf[0] = ByteBuffer.allocate(500);
+    inBuf[1] = ByteBuffer.allocate(1200);
+    inBuf[2] = ByteBuffer.allocate(500);
+    collect.buffer.setByteBuffer(inBuf[0], 0, 483);
+    collect.buffer.setByteBuffer(inBuf[1], 483, 1625 - 483);
+    collect.buffer.setByteBuffer(inBuf[2], 1625, 1674 - 1625);
+
+    for(int i=0; i < inBuf.length; ++i) {
+      inBuf[i].flip();
+    }
+    InStream in = InStream.create("test", inBuf,
+        new long[]{0,483, 1625}, 1674, codec, 400);
+    assertEquals("compressed stream test position: 0 length: 1674 range: 0" +
+                 " offset: 0 limit: 0 range 0 = 0 to 483;" +
+                 "  range 1 = 483 to 1142;  range 2 = 1625 to 49",
+                 in.toString());
+    DataInputStream inStream = new DataInputStream(in);
+    for(int i=0; i < 1024; ++i) {
+      int x = inStream.readInt();
+      assertEquals(i, x);
+    }
+    assertEquals(0, in.available());
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
+        new long[]{483, 1625}, 1674, codec, 400);
+    inStream = new DataInputStream(in);
+    positions[303].reset();
+    in.seek(positions[303]);
+    for(int i=303; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
+        new long[]{0, 1625}, 1674, codec, 400);
+    inStream = new DataInputStream(in);
+    positions[1001].reset();
+    for(int i=0; i < 300; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+    in.seek(positions[1001]);
+    for(int i=1001; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+  }
+
+  @Test
+  public void testUncompressedDisjointBuffers() throws Exception {
+    OutputCollector collect = new OutputCollector();
+    OutStream out = new OutStream("test", 400, null, collect);
+    PositionCollector[] positions = new PositionCollector[1024];
+    DataOutput stream = new DataOutputStream(out);
+    for(int i=0; i < 1024; ++i) {
+      positions[i] = new PositionCollector();
+      out.getPosition(positions[i]);
+      stream.writeInt(i);
+    }
+    out.flush();
+    assertEquals("test", out.toString());
+    assertEquals(4096, collect.buffer.size());
+    ByteBuffer[] inBuf = new ByteBuffer[3];
+    inBuf[0] = ByteBuffer.allocate(1100);
+    inBuf[1] = ByteBuffer.allocate(2200);
+    inBuf[2] = ByteBuffer.allocate(1100);
+    collect.buffer.setByteBuffer(inBuf[0], 0, 1024);
+    collect.buffer.setByteBuffer(inBuf[1], 1024, 2048);
+    collect.buffer.setByteBuffer(inBuf[2], 3072, 1024);
+
+    for(int i=0; i < inBuf.length; ++i) {
+      inBuf[i].flip();
+    }
+    InStream in = InStream.create("test", inBuf,
+        new long[]{0, 1024, 3072}, 4096, null, 400);
+    assertEquals("uncompressed stream test position: 0 length: 4096" +
+                 " range: 0 offset: 0 limit: 0",
+                 in.toString());
+    DataInputStream inStream = new DataInputStream(in);
+    for(int i=0; i < 1024; ++i) {
+      int x = inStream.readInt();
+      assertEquals(i, x);
+    }
+    assertEquals(0, in.available());
+    for(int i=1023; i >= 0; --i) {
+      in.seek(positions[i]);
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
+        new long[]{1024, 3072}, 4096, null, 400);
+    inStream = new DataInputStream(in);
+    positions[256].reset();
+    in.seek(positions[256]);
+    for(int i=256; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+
+    in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
+        new long[]{0, 3072}, 4096, null, 400);
+    inStream = new DataInputStream(in);
+    positions[768].reset();
+    for(int i=0; i < 256; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+    in.seek(positions[768]);
+    for(int i=768; i < 1024; ++i) {
+      assertEquals(i, inStream.readInt());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java b/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java
new file mode 100644
index 0000000..55d6893
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestIntegerCompressionReader.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestIntegerCompressionReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
+        new OutStream("test", 1000, codec, collect), true);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[4096];
+    Random random = new Random(99);
+    int[] junk = new int[2048];
+    for(int i=0; i < junk.length; ++i) {
+      junk[i] = random.nextInt();
+    }
+    for(int i=0; i < 4096; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, incrementing runs, non-runs
+      if (i < 1024) {
+        out.write(i/4);
+      } else if (i < 2048) {
+        out.write(2*i);
+      } else {
+        out.write(junk[i-2048]);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReaderV2 in =
+      new RunLengthIntegerReaderV2(InStream.create
+                                   ("test", new ByteBuffer[]{inBuf},
+                                    new long[]{0}, inBuf.remaining(),
+                                    codec, 1000), true, false);
+    for(int i=0; i < 2048; ++i) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
+        new OutStream("test", 100, null, collect), true);
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write(i);
+      } else {
+        out.write(256 * i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReaderV2 in =
+      new RunLengthIntegerReaderV2(InStream.create("test",
+                                                   new ByteBuffer[]{inBuf},
+                                                   new long[]{0},
+                                                   inBuf.remaining(),
+                                                   null, 100), true, false);
+    for(int i=0; i < 2048; i += 10) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i, x);
+      } else {
+        assertEquals(256 * i, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java b/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java
new file mode 100644
index 0000000..290208c
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestMemoryManager.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.junit.Test;
+import org.mockito.Matchers;
+import org.mockito.Mockito;
+
+import java.lang.management.ManagementFactory;
+
+import static junit.framework.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+/**
+ * Test the ORC memory manager.
+ */
+public class TestMemoryManager {
+  private static final double ERROR = 0.000001;
+
+  private static class NullCallback implements MemoryManager.Callback {
+    public boolean checkMemory(double newScale) {
+      return false;
+    }
+  }
+
+  @Test
+  public void testBasics() throws Exception {
+    Configuration conf = new Configuration();
+    MemoryManager mgr = new MemoryManager(conf);
+    NullCallback callback = new NullCallback();
+    long poolSize = mgr.getTotalMemoryPool();
+    assertEquals(Math.round(ManagementFactory.getMemoryMXBean().
+        getHeapMemoryUsage().getMax() * 0.5d), poolSize);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p1"), 1000, callback);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p1"), poolSize / 2, callback);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p2"), poolSize / 2, callback);
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p3"), poolSize / 2, callback);
+    assertEquals(0.6666667, mgr.getAllocationScale(), 0.00001);
+    mgr.addWriter(new Path("p4"), poolSize / 2, callback);
+    assertEquals(0.5, mgr.getAllocationScale(), 0.000001);
+    mgr.addWriter(new Path("p4"), 3 * poolSize / 2, callback);
+    assertEquals(0.3333333, mgr.getAllocationScale(), 0.000001);
+    mgr.removeWriter(new Path("p1"));
+    mgr.removeWriter(new Path("p2"));
+    assertEquals(0.5, mgr.getAllocationScale(), 0.00001);
+    mgr.removeWriter(new Path("p4"));
+    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
+  }
+
+  @Test
+  public void testConfig() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set("hive.exec.orc.memory.pool", "0.9");
+    MemoryManager mgr = new MemoryManager(conf);
+    long mem =
+        ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
+    System.err.print("Memory = " + mem);
+    long pool = mgr.getTotalMemoryPool();
+    assertTrue("Pool too small: " + pool, mem * 0.899 < pool);
+    assertTrue("Pool too big: " + pool, pool < mem * 0.901);
+  }
+
+  private static class DoubleMatcher extends BaseMatcher<Double> {
+    final double expected;
+    final double error;
+    DoubleMatcher(double expected, double error) {
+      this.expected = expected;
+      this.error = error;
+    }
+
+    @Override
+    public boolean matches(Object val) {
+      double dbl = (Double) val;
+      return Math.abs(dbl - expected) <= error;
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("not sufficiently close to ");
+      description.appendText(Double.toString(expected));
+    }
+  }
+
+  private static DoubleMatcher closeTo(double value, double error) {
+    return new DoubleMatcher(value, error);
+  }
+
+  @Test
+  public void testCallback() throws Exception {
+    Configuration conf = new Configuration();
+    MemoryManager mgr = new MemoryManager(conf);
+    long pool = mgr.getTotalMemoryPool();
+    MemoryManager.Callback[] calls = new MemoryManager.Callback[20];
+    for(int i=0; i < calls.length; ++i) {
+      calls[i] = Mockito.mock(MemoryManager.Callback.class);
+      mgr.addWriter(new Path(Integer.toString(i)), pool/4, calls[i]);
+    }
+    // add enough rows to get the memory manager to check the limits
+    for(int i=0; i < 10000; ++i) {
+      mgr.addedRow(1);
+    }
+    for(int call=0; call < calls.length; ++call) {
+      Mockito.verify(calls[call], Mockito.times(2))
+          .checkMemory(Matchers.doubleThat(closeTo(0.2, ERROR)));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java b/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java
new file mode 100644
index 0000000..e377a24
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestOrcWideTable.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class TestOrcWideTable {
+
+  @Test
+  public void testBufferSizeFor1Col() throws IOException {
+    assertEquals(128 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        1, 128*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor50Col() throws IOException {
+    assertEquals(256 * 1024, PhysicalFsWriter.getEstimatedBufferSize(256 * 1024 * 1024,
+        50, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor1000Col() throws IOException {
+    assertEquals(32 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        1000, 128*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor2000Col() throws IOException {
+    assertEquals(16 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        2000, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor4000Col() throws IOException {
+    assertEquals(8 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        4000, 256*1024));
+  }
+
+  @Test
+  public void testBufferSizeFor25000Col() throws IOException {
+    assertEquals(4 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
+        25000, 256*1024));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
new file mode 100644
index 0000000..23c13f4
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestOutStream.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.nio.ByteBuffer;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestOutStream {
+
+  @Test
+  public void testFlush() throws Exception {
+    OutStream.OutputReceiver receiver =
+        Mockito.mock(OutStream.OutputReceiver.class);
+    CompressionCodec codec = new ZlibCodec();
+    OutStream stream = new OutStream("test", 128*1024, codec, receiver);
+    assertEquals(0L, stream.getBufferSize());
+    stream.write(new byte[]{0, 1, 2});
+    stream.flush();
+    Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class));
+    assertEquals(0L, stream.getBufferSize());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java b/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java
new file mode 100644
index 0000000..441a3a2
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRLEv2.java
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.tools.FileDump;
+import org.apache.hive.orc.CompressionKind;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestRLEv2 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Path testFilePath;
+  Configuration conf;
+  FileSystem fs;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestRLEv2." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  void appendInt(VectorizedRowBatch batch, int i) {
+    ((LongColumnVector) batch.cols[0]).vector[batch.size++] = i;
+  }
+
+  @Test
+  public void testFixedDeltaZero() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, 123);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
+    // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaOne() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, i % 512);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
+    // and 1 byte delta (delta = 1). In total, 4 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaOneDescending() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, 512 - (i % 512));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
+    // and 1 byte delta (delta = 1). In total, 5 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaLarge() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, i % 512 + ((i % 512) * 100));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
+    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testFixedDeltaLargeDescending() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5120; ++i) {
+      appendInt(batch, (512 - i % 512) + ((i % 512) * 100));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
+    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testShortRepeat() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (int i = 0; i < 5; ++i) {
+      appendInt(batch, 10);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // 1 byte header + 1 byte value
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testDeltaUnknownSign() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    appendInt(batch, 0);
+    for (int i = 0; i < 511; ++i) {
+      appendInt(batch, i);
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
+    // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
+    // each, 5120/8 = 640). Total bytes 642
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
+    System.setOut(origOut);
+  }
+
+  @Test
+  public void testPatchedBase() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+    Writer w = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.NONE)
+            .setSchema(schema)
+            .rowIndexStride(0)
+            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+            .version(OrcFile.Version.V_0_12)
+    );
+
+    Random rand = new Random(123);
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    appendInt(batch, 10000000);
+    for (int i = 0; i < 511; ++i) {
+      appendInt(batch, rand.nextInt(i+1));
+    }
+    w.addRowBatch(batch);
+    w.close();
+
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toUri().toString()});
+    System.out.flush();
+    String outDump = new String(myOut.toByteArray());
+    // use PATCHED_BASE encoding
+    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
+    System.setOut(origOut);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java b/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java
new file mode 100644
index 0000000..c414e17
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestReaderImpl.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hive.orc.FileFormatException;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.rules.ExpectedException;
+
+public class TestReaderImpl {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private final Path path = new Path("test-file.orc");
+  private FSDataInputStream in;
+  private int psLen;
+  private ByteBuffer buffer;
+
+  @Before
+  public void setup() {
+    in = null;
+  }
+
+  @Test
+  public void testEnsureOrcFooterSmallTextFile() throws IOException {
+    prepareTestCase("1".getBytes());
+    thrown.expect(FileFormatException.class);
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  @Test
+  public void testEnsureOrcFooterLargeTextFile() throws IOException {
+    prepareTestCase("This is Some Text File".getBytes());
+    thrown.expect(FileFormatException.class);
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  @Test
+  public void testEnsureOrcFooter011ORCFile() throws IOException {
+    prepareTestCase(composeContent(OrcFile.MAGIC, "FOOTER"));
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  @Test
+  public void testEnsureOrcFooterCorrectORCFooter() throws IOException {
+    prepareTestCase(composeContent("", OrcFile.MAGIC));
+    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
+  }
+
+  private void prepareTestCase(byte[] bytes) {
+    buffer = ByteBuffer.wrap(bytes);
+    psLen = buffer.get(bytes.length - 1) & 0xff;
+    in = new FSDataInputStream(new SeekableByteArrayInputStream(bytes));
+  }
+
+  private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException {
+    ByteBuffer header = Text.encode(headerStr);
+    ByteBuffer footer = Text.encode(footerStr);
+    int headerLen = header.remaining();
+    int footerLen = footer.remaining() + 1;
+
+    ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen);
+
+    buf.put(header);
+    buf.put(footer);
+    buf.put((byte) footerLen);
+    return buf.array();
+  }
+
+  private static final class SeekableByteArrayInputStream extends ByteArrayInputStream
+          implements Seekable, PositionedReadable {
+
+    public SeekableByteArrayInputStream(byte[] buf) {
+      super(buf);
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      this.reset();
+      this.skip(pos);
+    }
+
+    @Override
+    public long getPos() throws IOException {
+      return pos;
+    }
+
+    @Override
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int read(long position, byte[] buffer, int offset, int length)
+            throws IOException {
+      long oldPos = getPos();
+      int nread = -1;
+      try {
+        seek(position);
+        nread = read(buffer, offset, length);
+      } finally {
+        seek(oldPos);
+      }
+      return nread;
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer, int offset, int length)
+            throws IOException {
+      int nread = 0;
+      while (nread < length) {
+        int nbytes = read(position + nread, buffer, offset + nread, length - nread);
+        if (nbytes < 0) {
+          throw new EOFException("End of file reached before reading fully.");
+        }
+        nread += nbytes;
+      }
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer)
+            throws IOException {
+      readFully(position, buffer, 0, buffer.length);
+    }
+  }
+}


[36/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/gen/protobuf-java/org/apache/hive/orc/OrcProto.java
----------------------------------------------------------------------
diff --git a/orc/src/gen/protobuf-java/org/apache/hive/orc/OrcProto.java b/orc/src/gen/protobuf-java/org/apache/hive/orc/OrcProto.java
new file mode 100644
index 0000000..963471b
--- /dev/null
+++ b/orc/src/gen/protobuf-java/org/apache/hive/orc/OrcProto.java
@@ -0,0 +1,20179 @@
+// Generated by the protocol buffer compiler.  DO NOT EDIT!
+// source: orc_proto.proto
+
+package org.apache.hive.orc;
+
+public final class OrcProto {
+  private OrcProto() {}
+  public static void registerAllExtensions(
+      com.google.protobuf.ExtensionRegistry registry) {
+  }
+  /**
+   * Protobuf enum {@code orc.proto.CompressionKind}
+   */
+  public enum CompressionKind
+      implements com.google.protobuf.ProtocolMessageEnum {
+    /**
+     * <code>NONE = 0;</code>
+     */
+    NONE(0, 0),
+    /**
+     * <code>ZLIB = 1;</code>
+     */
+    ZLIB(1, 1),
+    /**
+     * <code>SNAPPY = 2;</code>
+     */
+    SNAPPY(2, 2),
+    /**
+     * <code>LZO = 3;</code>
+     */
+    LZO(3, 3),
+    ;
+
+    /**
+     * <code>NONE = 0;</code>
+     */
+    public static final int NONE_VALUE = 0;
+    /**
+     * <code>ZLIB = 1;</code>
+     */
+    public static final int ZLIB_VALUE = 1;
+    /**
+     * <code>SNAPPY = 2;</code>
+     */
+    public static final int SNAPPY_VALUE = 2;
+    /**
+     * <code>LZO = 3;</code>
+     */
+    public static final int LZO_VALUE = 3;
+
+
+    public final int getNumber() { return value; }
+
+    public static CompressionKind valueOf(int value) {
+      switch (value) {
+        case 0: return NONE;
+        case 1: return ZLIB;
+        case 2: return SNAPPY;
+        case 3: return LZO;
+        default: return null;
+      }
+    }
+
+    public static com.google.protobuf.Internal.EnumLiteMap<CompressionKind>
+        internalGetValueMap() {
+      return internalValueMap;
+    }
+    private static com.google.protobuf.Internal.EnumLiteMap<CompressionKind>
+        internalValueMap =
+          new com.google.protobuf.Internal.EnumLiteMap<CompressionKind>() {
+            public CompressionKind findValueByNumber(int number) {
+              return CompressionKind.valueOf(number);
+            }
+          };
+
+    public final com.google.protobuf.Descriptors.EnumValueDescriptor
+        getValueDescriptor() {
+      return getDescriptor().getValues().get(index);
+    }
+    public final com.google.protobuf.Descriptors.EnumDescriptor
+        getDescriptorForType() {
+      return getDescriptor();
+    }
+    public static final com.google.protobuf.Descriptors.EnumDescriptor
+        getDescriptor() {
+      return OrcProto.getDescriptor().getEnumTypes().get(0);
+    }
+
+    private static final CompressionKind[] VALUES = values();
+
+    public static CompressionKind valueOf(
+        com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
+      if (desc.getType() != getDescriptor()) {
+        throw new java.lang.IllegalArgumentException(
+          "EnumValueDescriptor is not for this type.");
+      }
+      return VALUES[desc.getIndex()];
+    }
+
+    private final int index;
+    private final int value;
+
+    private CompressionKind(int index, int value) {
+      this.index = index;
+      this.value = value;
+    }
+
+    // @@protoc_insertion_point(enum_scope:orc.proto.CompressionKind)
+  }
+
+  public interface IntegerStatisticsOrBuilder
+      extends com.google.protobuf.MessageOrBuilder {
+
+    // optional sint64 minimum = 1;
+    /**
+     * <code>optional sint64 minimum = 1;</code>
+     */
+    boolean hasMinimum();
+    /**
+     * <code>optional sint64 minimum = 1;</code>
+     */
+    long getMinimum();
+
+    // optional sint64 maximum = 2;
+    /**
+     * <code>optional sint64 maximum = 2;</code>
+     */
+    boolean hasMaximum();
+    /**
+     * <code>optional sint64 maximum = 2;</code>
+     */
+    long getMaximum();
+
+    // optional sint64 sum = 3;
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     */
+    boolean hasSum();
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     */
+    long getSum();
+  }
+  /**
+   * Protobuf type {@code orc.proto.IntegerStatistics}
+   */
+  public static final class IntegerStatistics extends
+      com.google.protobuf.GeneratedMessage
+      implements IntegerStatisticsOrBuilder {
+    // Use IntegerStatistics.newBuilder() to construct.
+    private IntegerStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
+      super(builder);
+      this.unknownFields = builder.getUnknownFields();
+    }
+    private IntegerStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+    private static final IntegerStatistics defaultInstance;
+    public static IntegerStatistics getDefaultInstance() {
+      return defaultInstance;
+    }
+
+    public IntegerStatistics getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+
+    private final com.google.protobuf.UnknownFieldSet unknownFields;
+    @java.lang.Override
+    public final com.google.protobuf.UnknownFieldSet
+        getUnknownFields() {
+      return this.unknownFields;
+    }
+    private IntegerStatistics(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      initFields();
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+            case 8: {
+              bitField0_ |= 0x00000001;
+              minimum_ = input.readSInt64();
+              break;
+            }
+            case 16: {
+              bitField0_ |= 0x00000002;
+              maximum_ = input.readSInt64();
+              break;
+            }
+            case 24: {
+              bitField0_ |= 0x00000004;
+              sum_ = input.readSInt64();
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e.getMessage()).setUnfinishedMessage(this);
+      } finally {
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return OrcProto.internal_static_orc_proto_IntegerStatistics_descriptor;
+    }
+
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return OrcProto.internal_static_orc_proto_IntegerStatistics_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              OrcProto.IntegerStatistics.class, OrcProto.IntegerStatistics.Builder.class);
+    }
+
+    public static com.google.protobuf.Parser<IntegerStatistics> PARSER =
+        new com.google.protobuf.AbstractParser<IntegerStatistics>() {
+      public IntegerStatistics parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new IntegerStatistics(input, extensionRegistry);
+      }
+    };
+
+    @java.lang.Override
+    public com.google.protobuf.Parser<IntegerStatistics> getParserForType() {
+      return PARSER;
+    }
+
+    private int bitField0_;
+    // optional sint64 minimum = 1;
+    public static final int MINIMUM_FIELD_NUMBER = 1;
+    private long minimum_;
+    /**
+     * <code>optional sint64 minimum = 1;</code>
+     */
+    public boolean hasMinimum() {
+      return ((bitField0_ & 0x00000001) == 0x00000001);
+    }
+    /**
+     * <code>optional sint64 minimum = 1;</code>
+     */
+    public long getMinimum() {
+      return minimum_;
+    }
+
+    // optional sint64 maximum = 2;
+    public static final int MAXIMUM_FIELD_NUMBER = 2;
+    private long maximum_;
+    /**
+     * <code>optional sint64 maximum = 2;</code>
+     */
+    public boolean hasMaximum() {
+      return ((bitField0_ & 0x00000002) == 0x00000002);
+    }
+    /**
+     * <code>optional sint64 maximum = 2;</code>
+     */
+    public long getMaximum() {
+      return maximum_;
+    }
+
+    // optional sint64 sum = 3;
+    public static final int SUM_FIELD_NUMBER = 3;
+    private long sum_;
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     */
+    public boolean hasSum() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     */
+    public long getSum() {
+      return sum_;
+    }
+
+    private void initFields() {
+      minimum_ = 0L;
+      maximum_ = 0L;
+      sum_ = 0L;
+    }
+    private byte memoizedIsInitialized = -1;
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized != -1) return isInitialized == 1;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        output.writeSInt64(1, minimum_);
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        output.writeSInt64(2, maximum_);
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        output.writeSInt64(3, sum_);
+      }
+      getUnknownFields().writeTo(output);
+    }
+
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeSInt64Size(1, minimum_);
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeSInt64Size(2, maximum_);
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeSInt64Size(3, sum_);
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+
+    private static final long serialVersionUID = 0L;
+    @java.lang.Override
+    protected java.lang.Object writeReplace()
+        throws java.io.ObjectStreamException {
+      return super.writeReplace();
+    }
+
+    public static OrcProto.IntegerStatistics parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+    public static OrcProto.IntegerStatistics parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input);
+    }
+    public static OrcProto.IntegerStatistics parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input, extensionRegistry);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.IntegerStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(OrcProto.IntegerStatistics prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+
+    @java.lang.Override
+    protected Builder newBuilderForType(
+        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code orc.proto.IntegerStatistics}
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder>
+       implements OrcProto.IntegerStatisticsOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return OrcProto.internal_static_orc_proto_IntegerStatistics_descriptor;
+      }
+
+      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return OrcProto.internal_static_orc_proto_IntegerStatistics_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                OrcProto.IntegerStatistics.class, OrcProto.IntegerStatistics.Builder.class);
+      }
+
+      // Construct using OrcProto.IntegerStatistics.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+        }
+      }
+      private static Builder create() {
+        return new Builder();
+      }
+
+      public Builder clear() {
+        super.clear();
+        minimum_ = 0L;
+        bitField0_ = (bitField0_ & ~0x00000001);
+        maximum_ = 0L;
+        bitField0_ = (bitField0_ & ~0x00000002);
+        sum_ = 0L;
+        bitField0_ = (bitField0_ & ~0x00000004);
+        return this;
+      }
+
+      public Builder clone() {
+        return create().mergeFrom(buildPartial());
+      }
+
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return OrcProto.internal_static_orc_proto_IntegerStatistics_descriptor;
+      }
+
+      public OrcProto.IntegerStatistics getDefaultInstanceForType() {
+        return OrcProto.IntegerStatistics.getDefaultInstance();
+      }
+
+      public OrcProto.IntegerStatistics build() {
+        OrcProto.IntegerStatistics result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      public OrcProto.IntegerStatistics buildPartial() {
+        OrcProto.IntegerStatistics result = new OrcProto.IntegerStatistics(this);
+        int from_bitField0_ = bitField0_;
+        int to_bitField0_ = 0;
+        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
+          to_bitField0_ |= 0x00000001;
+        }
+        result.minimum_ = minimum_;
+        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+          to_bitField0_ |= 0x00000002;
+        }
+        result.maximum_ = maximum_;
+        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+          to_bitField0_ |= 0x00000004;
+        }
+        result.sum_ = sum_;
+        result.bitField0_ = to_bitField0_;
+        onBuilt();
+        return result;
+      }
+
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof OrcProto.IntegerStatistics) {
+          return mergeFrom((OrcProto.IntegerStatistics)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(OrcProto.IntegerStatistics other) {
+        if (other == OrcProto.IntegerStatistics.getDefaultInstance()) return this;
+        if (other.hasMinimum()) {
+          setMinimum(other.getMinimum());
+        }
+        if (other.hasMaximum()) {
+          setMaximum(other.getMaximum());
+        }
+        if (other.hasSum()) {
+          setSum(other.getSum());
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        OrcProto.IntegerStatistics parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (OrcProto.IntegerStatistics) e.getUnfinishedMessage();
+          throw e;
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+      private int bitField0_;
+
+      // optional sint64 minimum = 1;
+      private long minimum_ ;
+      /**
+       * <code>optional sint64 minimum = 1;</code>
+       */
+      public boolean hasMinimum() {
+        return ((bitField0_ & 0x00000001) == 0x00000001);
+      }
+      /**
+       * <code>optional sint64 minimum = 1;</code>
+       */
+      public long getMinimum() {
+        return minimum_;
+      }
+      /**
+       * <code>optional sint64 minimum = 1;</code>
+       */
+      public Builder setMinimum(long value) {
+        bitField0_ |= 0x00000001;
+        minimum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional sint64 minimum = 1;</code>
+       */
+      public Builder clearMinimum() {
+        bitField0_ = (bitField0_ & ~0x00000001);
+        minimum_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      // optional sint64 maximum = 2;
+      private long maximum_ ;
+      /**
+       * <code>optional sint64 maximum = 2;</code>
+       */
+      public boolean hasMaximum() {
+        return ((bitField0_ & 0x00000002) == 0x00000002);
+      }
+      /**
+       * <code>optional sint64 maximum = 2;</code>
+       */
+      public long getMaximum() {
+        return maximum_;
+      }
+      /**
+       * <code>optional sint64 maximum = 2;</code>
+       */
+      public Builder setMaximum(long value) {
+        bitField0_ |= 0x00000002;
+        maximum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional sint64 maximum = 2;</code>
+       */
+      public Builder clearMaximum() {
+        bitField0_ = (bitField0_ & ~0x00000002);
+        maximum_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      // optional sint64 sum = 3;
+      private long sum_ ;
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       */
+      public boolean hasSum() {
+        return ((bitField0_ & 0x00000004) == 0x00000004);
+      }
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       */
+      public long getSum() {
+        return sum_;
+      }
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       */
+      public Builder setSum(long value) {
+        bitField0_ |= 0x00000004;
+        sum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       */
+      public Builder clearSum() {
+        bitField0_ = (bitField0_ & ~0x00000004);
+        sum_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      // @@protoc_insertion_point(builder_scope:orc.proto.IntegerStatistics)
+    }
+
+    static {
+      defaultInstance = new IntegerStatistics(true);
+      defaultInstance.initFields();
+    }
+
+    // @@protoc_insertion_point(class_scope:orc.proto.IntegerStatistics)
+  }
+
+  public interface DoubleStatisticsOrBuilder
+      extends com.google.protobuf.MessageOrBuilder {
+
+    // optional double minimum = 1;
+    /**
+     * <code>optional double minimum = 1;</code>
+     */
+    boolean hasMinimum();
+    /**
+     * <code>optional double minimum = 1;</code>
+     */
+    double getMinimum();
+
+    // optional double maximum = 2;
+    /**
+     * <code>optional double maximum = 2;</code>
+     */
+    boolean hasMaximum();
+    /**
+     * <code>optional double maximum = 2;</code>
+     */
+    double getMaximum();
+
+    // optional double sum = 3;
+    /**
+     * <code>optional double sum = 3;</code>
+     */
+    boolean hasSum();
+    /**
+     * <code>optional double sum = 3;</code>
+     */
+    double getSum();
+  }
+  /**
+   * Protobuf type {@code orc.proto.DoubleStatistics}
+   */
+  public static final class DoubleStatistics extends
+      com.google.protobuf.GeneratedMessage
+      implements DoubleStatisticsOrBuilder {
+    // Use DoubleStatistics.newBuilder() to construct.
+    private DoubleStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
+      super(builder);
+      this.unknownFields = builder.getUnknownFields();
+    }
+    private DoubleStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+    private static final DoubleStatistics defaultInstance;
+    public static DoubleStatistics getDefaultInstance() {
+      return defaultInstance;
+    }
+
+    public DoubleStatistics getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+
+    private final com.google.protobuf.UnknownFieldSet unknownFields;
+    @java.lang.Override
+    public final com.google.protobuf.UnknownFieldSet
+        getUnknownFields() {
+      return this.unknownFields;
+    }
+    private DoubleStatistics(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      initFields();
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+            case 9: {
+              bitField0_ |= 0x00000001;
+              minimum_ = input.readDouble();
+              break;
+            }
+            case 17: {
+              bitField0_ |= 0x00000002;
+              maximum_ = input.readDouble();
+              break;
+            }
+            case 25: {
+              bitField0_ |= 0x00000004;
+              sum_ = input.readDouble();
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e.getMessage()).setUnfinishedMessage(this);
+      } finally {
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return OrcProto.internal_static_orc_proto_DoubleStatistics_descriptor;
+    }
+
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return OrcProto.internal_static_orc_proto_DoubleStatistics_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              OrcProto.DoubleStatistics.class, OrcProto.DoubleStatistics.Builder.class);
+    }
+
+    public static com.google.protobuf.Parser<DoubleStatistics> PARSER =
+        new com.google.protobuf.AbstractParser<DoubleStatistics>() {
+      public DoubleStatistics parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new DoubleStatistics(input, extensionRegistry);
+      }
+    };
+
+    @java.lang.Override
+    public com.google.protobuf.Parser<DoubleStatistics> getParserForType() {
+      return PARSER;
+    }
+
+    private int bitField0_;
+    // optional double minimum = 1;
+    public static final int MINIMUM_FIELD_NUMBER = 1;
+    private double minimum_;
+    /**
+     * <code>optional double minimum = 1;</code>
+     */
+    public boolean hasMinimum() {
+      return ((bitField0_ & 0x00000001) == 0x00000001);
+    }
+    /**
+     * <code>optional double minimum = 1;</code>
+     */
+    public double getMinimum() {
+      return minimum_;
+    }
+
+    // optional double maximum = 2;
+    public static final int MAXIMUM_FIELD_NUMBER = 2;
+    private double maximum_;
+    /**
+     * <code>optional double maximum = 2;</code>
+     */
+    public boolean hasMaximum() {
+      return ((bitField0_ & 0x00000002) == 0x00000002);
+    }
+    /**
+     * <code>optional double maximum = 2;</code>
+     */
+    public double getMaximum() {
+      return maximum_;
+    }
+
+    // optional double sum = 3;
+    public static final int SUM_FIELD_NUMBER = 3;
+    private double sum_;
+    /**
+     * <code>optional double sum = 3;</code>
+     */
+    public boolean hasSum() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional double sum = 3;</code>
+     */
+    public double getSum() {
+      return sum_;
+    }
+
+    private void initFields() {
+      minimum_ = 0D;
+      maximum_ = 0D;
+      sum_ = 0D;
+    }
+    private byte memoizedIsInitialized = -1;
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized != -1) return isInitialized == 1;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        output.writeDouble(1, minimum_);
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        output.writeDouble(2, maximum_);
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        output.writeDouble(3, sum_);
+      }
+      getUnknownFields().writeTo(output);
+    }
+
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeDoubleSize(1, minimum_);
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeDoubleSize(2, maximum_);
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeDoubleSize(3, sum_);
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+
+    private static final long serialVersionUID = 0L;
+    @java.lang.Override
+    protected java.lang.Object writeReplace()
+        throws java.io.ObjectStreamException {
+      return super.writeReplace();
+    }
+
+    public static OrcProto.DoubleStatistics parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+    public static OrcProto.DoubleStatistics parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input);
+    }
+    public static OrcProto.DoubleStatistics parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input, extensionRegistry);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.DoubleStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(OrcProto.DoubleStatistics prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+
+    @java.lang.Override
+    protected Builder newBuilderForType(
+        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code orc.proto.DoubleStatistics}
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder>
+       implements OrcProto.DoubleStatisticsOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return OrcProto.internal_static_orc_proto_DoubleStatistics_descriptor;
+      }
+
+      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return OrcProto.internal_static_orc_proto_DoubleStatistics_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                OrcProto.DoubleStatistics.class, OrcProto.DoubleStatistics.Builder.class);
+      }
+
+      // Construct using OrcProto.DoubleStatistics.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+        }
+      }
+      private static Builder create() {
+        return new Builder();
+      }
+
+      public Builder clear() {
+        super.clear();
+        minimum_ = 0D;
+        bitField0_ = (bitField0_ & ~0x00000001);
+        maximum_ = 0D;
+        bitField0_ = (bitField0_ & ~0x00000002);
+        sum_ = 0D;
+        bitField0_ = (bitField0_ & ~0x00000004);
+        return this;
+      }
+
+      public Builder clone() {
+        return create().mergeFrom(buildPartial());
+      }
+
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return OrcProto.internal_static_orc_proto_DoubleStatistics_descriptor;
+      }
+
+      public OrcProto.DoubleStatistics getDefaultInstanceForType() {
+        return OrcProto.DoubleStatistics.getDefaultInstance();
+      }
+
+      public OrcProto.DoubleStatistics build() {
+        OrcProto.DoubleStatistics result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      public OrcProto.DoubleStatistics buildPartial() {
+        OrcProto.DoubleStatistics result = new OrcProto.DoubleStatistics(this);
+        int from_bitField0_ = bitField0_;
+        int to_bitField0_ = 0;
+        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
+          to_bitField0_ |= 0x00000001;
+        }
+        result.minimum_ = minimum_;
+        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+          to_bitField0_ |= 0x00000002;
+        }
+        result.maximum_ = maximum_;
+        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+          to_bitField0_ |= 0x00000004;
+        }
+        result.sum_ = sum_;
+        result.bitField0_ = to_bitField0_;
+        onBuilt();
+        return result;
+      }
+
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof OrcProto.DoubleStatistics) {
+          return mergeFrom((OrcProto.DoubleStatistics)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(OrcProto.DoubleStatistics other) {
+        if (other == OrcProto.DoubleStatistics.getDefaultInstance()) return this;
+        if (other.hasMinimum()) {
+          setMinimum(other.getMinimum());
+        }
+        if (other.hasMaximum()) {
+          setMaximum(other.getMaximum());
+        }
+        if (other.hasSum()) {
+          setSum(other.getSum());
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        OrcProto.DoubleStatistics parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (OrcProto.DoubleStatistics) e.getUnfinishedMessage();
+          throw e;
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+      private int bitField0_;
+
+      // optional double minimum = 1;
+      private double minimum_ ;
+      /**
+       * <code>optional double minimum = 1;</code>
+       */
+      public boolean hasMinimum() {
+        return ((bitField0_ & 0x00000001) == 0x00000001);
+      }
+      /**
+       * <code>optional double minimum = 1;</code>
+       */
+      public double getMinimum() {
+        return minimum_;
+      }
+      /**
+       * <code>optional double minimum = 1;</code>
+       */
+      public Builder setMinimum(double value) {
+        bitField0_ |= 0x00000001;
+        minimum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional double minimum = 1;</code>
+       */
+      public Builder clearMinimum() {
+        bitField0_ = (bitField0_ & ~0x00000001);
+        minimum_ = 0D;
+        onChanged();
+        return this;
+      }
+
+      // optional double maximum = 2;
+      private double maximum_ ;
+      /**
+       * <code>optional double maximum = 2;</code>
+       */
+      public boolean hasMaximum() {
+        return ((bitField0_ & 0x00000002) == 0x00000002);
+      }
+      /**
+       * <code>optional double maximum = 2;</code>
+       */
+      public double getMaximum() {
+        return maximum_;
+      }
+      /**
+       * <code>optional double maximum = 2;</code>
+       */
+      public Builder setMaximum(double value) {
+        bitField0_ |= 0x00000002;
+        maximum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional double maximum = 2;</code>
+       */
+      public Builder clearMaximum() {
+        bitField0_ = (bitField0_ & ~0x00000002);
+        maximum_ = 0D;
+        onChanged();
+        return this;
+      }
+
+      // optional double sum = 3;
+      private double sum_ ;
+      /**
+       * <code>optional double sum = 3;</code>
+       */
+      public boolean hasSum() {
+        return ((bitField0_ & 0x00000004) == 0x00000004);
+      }
+      /**
+       * <code>optional double sum = 3;</code>
+       */
+      public double getSum() {
+        return sum_;
+      }
+      /**
+       * <code>optional double sum = 3;</code>
+       */
+      public Builder setSum(double value) {
+        bitField0_ |= 0x00000004;
+        sum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional double sum = 3;</code>
+       */
+      public Builder clearSum() {
+        bitField0_ = (bitField0_ & ~0x00000004);
+        sum_ = 0D;
+        onChanged();
+        return this;
+      }
+
+      // @@protoc_insertion_point(builder_scope:orc.proto.DoubleStatistics)
+    }
+
+    static {
+      defaultInstance = new DoubleStatistics(true);
+      defaultInstance.initFields();
+    }
+
+    // @@protoc_insertion_point(class_scope:orc.proto.DoubleStatistics)
+  }
+
+  public interface StringStatisticsOrBuilder
+      extends com.google.protobuf.MessageOrBuilder {
+
+    // optional string minimum = 1;
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    boolean hasMinimum();
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    java.lang.String getMinimum();
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    com.google.protobuf.ByteString
+        getMinimumBytes();
+
+    // optional string maximum = 2;
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    boolean hasMaximum();
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    java.lang.String getMaximum();
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    com.google.protobuf.ByteString
+        getMaximumBytes();
+
+    // optional sint64 sum = 3;
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     *
+     * <pre>
+     * sum will store the total length of all strings in a stripe
+     * </pre>
+     */
+    boolean hasSum();
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     *
+     * <pre>
+     * sum will store the total length of all strings in a stripe
+     * </pre>
+     */
+    long getSum();
+  }
+  /**
+   * Protobuf type {@code orc.proto.StringStatistics}
+   */
+  public static final class StringStatistics extends
+      com.google.protobuf.GeneratedMessage
+      implements StringStatisticsOrBuilder {
+    // Use StringStatistics.newBuilder() to construct.
+    private StringStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
+      super(builder);
+      this.unknownFields = builder.getUnknownFields();
+    }
+    private StringStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+    private static final StringStatistics defaultInstance;
+    public static StringStatistics getDefaultInstance() {
+      return defaultInstance;
+    }
+
+    public StringStatistics getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+
+    private final com.google.protobuf.UnknownFieldSet unknownFields;
+    @java.lang.Override
+    public final com.google.protobuf.UnknownFieldSet
+        getUnknownFields() {
+      return this.unknownFields;
+    }
+    private StringStatistics(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      initFields();
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+            case 10: {
+              bitField0_ |= 0x00000001;
+              minimum_ = input.readBytes();
+              break;
+            }
+            case 18: {
+              bitField0_ |= 0x00000002;
+              maximum_ = input.readBytes();
+              break;
+            }
+            case 24: {
+              bitField0_ |= 0x00000004;
+              sum_ = input.readSInt64();
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e.getMessage()).setUnfinishedMessage(this);
+      } finally {
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return OrcProto.internal_static_orc_proto_StringStatistics_descriptor;
+    }
+
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return OrcProto.internal_static_orc_proto_StringStatistics_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              OrcProto.StringStatistics.class, OrcProto.StringStatistics.Builder.class);
+    }
+
+    public static com.google.protobuf.Parser<StringStatistics> PARSER =
+        new com.google.protobuf.AbstractParser<StringStatistics>() {
+      public StringStatistics parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new StringStatistics(input, extensionRegistry);
+      }
+    };
+
+    @java.lang.Override
+    public com.google.protobuf.Parser<StringStatistics> getParserForType() {
+      return PARSER;
+    }
+
+    private int bitField0_;
+    // optional string minimum = 1;
+    public static final int MINIMUM_FIELD_NUMBER = 1;
+    private java.lang.Object minimum_;
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    public boolean hasMinimum() {
+      return ((bitField0_ & 0x00000001) == 0x00000001);
+    }
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    public java.lang.String getMinimum() {
+      java.lang.Object ref = minimum_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs =
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          minimum_ = s;
+        }
+        return s;
+      }
+    }
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    public com.google.protobuf.ByteString
+        getMinimumBytes() {
+      java.lang.Object ref = minimum_;
+      if (ref instanceof java.lang.String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (java.lang.String) ref);
+        minimum_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    // optional string maximum = 2;
+    public static final int MAXIMUM_FIELD_NUMBER = 2;
+    private java.lang.Object maximum_;
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    public boolean hasMaximum() {
+      return ((bitField0_ & 0x00000002) == 0x00000002);
+    }
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    public java.lang.String getMaximum() {
+      java.lang.Object ref = maximum_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs =
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          maximum_ = s;
+        }
+        return s;
+      }
+    }
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    public com.google.protobuf.ByteString
+        getMaximumBytes() {
+      java.lang.Object ref = maximum_;
+      if (ref instanceof java.lang.String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (java.lang.String) ref);
+        maximum_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    // optional sint64 sum = 3;
+    public static final int SUM_FIELD_NUMBER = 3;
+    private long sum_;
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     *
+     * <pre>
+     * sum will store the total length of all strings in a stripe
+     * </pre>
+     */
+    public boolean hasSum() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional sint64 sum = 3;</code>
+     *
+     * <pre>
+     * sum will store the total length of all strings in a stripe
+     * </pre>
+     */
+    public long getSum() {
+      return sum_;
+    }
+
+    private void initFields() {
+      minimum_ = "";
+      maximum_ = "";
+      sum_ = 0L;
+    }
+    private byte memoizedIsInitialized = -1;
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized != -1) return isInitialized == 1;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        output.writeBytes(1, getMinimumBytes());
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        output.writeBytes(2, getMaximumBytes());
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        output.writeSInt64(3, sum_);
+      }
+      getUnknownFields().writeTo(output);
+    }
+
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      if (((bitField0_ & 0x00000001) == 0x00000001)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBytesSize(1, getMinimumBytes());
+      }
+      if (((bitField0_ & 0x00000002) == 0x00000002)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBytesSize(2, getMaximumBytes());
+      }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeSInt64Size(3, sum_);
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+
+    private static final long serialVersionUID = 0L;
+    @java.lang.Override
+    protected java.lang.Object writeReplace()
+        throws java.io.ObjectStreamException {
+      return super.writeReplace();
+    }
+
+    public static OrcProto.StringStatistics parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.StringStatistics parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.StringStatistics parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.StringStatistics parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.StringStatistics parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.StringStatistics parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+    public static OrcProto.StringStatistics parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input);
+    }
+    public static OrcProto.StringStatistics parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input, extensionRegistry);
+    }
+    public static OrcProto.StringStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.StringStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(OrcProto.StringStatistics prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+
+    @java.lang.Override
+    protected Builder newBuilderForType(
+        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code orc.proto.StringStatistics}
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder>
+       implements OrcProto.StringStatisticsOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return OrcProto.internal_static_orc_proto_StringStatistics_descriptor;
+      }
+
+      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return OrcProto.internal_static_orc_proto_StringStatistics_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                OrcProto.StringStatistics.class, OrcProto.StringStatistics.Builder.class);
+      }
+
+      // Construct using OrcProto.StringStatistics.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+        }
+      }
+      private static Builder create() {
+        return new Builder();
+      }
+
+      public Builder clear() {
+        super.clear();
+        minimum_ = "";
+        bitField0_ = (bitField0_ & ~0x00000001);
+        maximum_ = "";
+        bitField0_ = (bitField0_ & ~0x00000002);
+        sum_ = 0L;
+        bitField0_ = (bitField0_ & ~0x00000004);
+        return this;
+      }
+
+      public Builder clone() {
+        return create().mergeFrom(buildPartial());
+      }
+
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return OrcProto.internal_static_orc_proto_StringStatistics_descriptor;
+      }
+
+      public OrcProto.StringStatistics getDefaultInstanceForType() {
+        return OrcProto.StringStatistics.getDefaultInstance();
+      }
+
+      public OrcProto.StringStatistics build() {
+        OrcProto.StringStatistics result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      public OrcProto.StringStatistics buildPartial() {
+        OrcProto.StringStatistics result = new OrcProto.StringStatistics(this);
+        int from_bitField0_ = bitField0_;
+        int to_bitField0_ = 0;
+        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
+          to_bitField0_ |= 0x00000001;
+        }
+        result.minimum_ = minimum_;
+        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
+          to_bitField0_ |= 0x00000002;
+        }
+        result.maximum_ = maximum_;
+        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+          to_bitField0_ |= 0x00000004;
+        }
+        result.sum_ = sum_;
+        result.bitField0_ = to_bitField0_;
+        onBuilt();
+        return result;
+      }
+
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof OrcProto.StringStatistics) {
+          return mergeFrom((OrcProto.StringStatistics)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(OrcProto.StringStatistics other) {
+        if (other == OrcProto.StringStatistics.getDefaultInstance()) return this;
+        if (other.hasMinimum()) {
+          bitField0_ |= 0x00000001;
+          minimum_ = other.minimum_;
+          onChanged();
+        }
+        if (other.hasMaximum()) {
+          bitField0_ |= 0x00000002;
+          maximum_ = other.maximum_;
+          onChanged();
+        }
+        if (other.hasSum()) {
+          setSum(other.getSum());
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        OrcProto.StringStatistics parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (OrcProto.StringStatistics) e.getUnfinishedMessage();
+          throw e;
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+      private int bitField0_;
+
+      // optional string minimum = 1;
+      private java.lang.Object minimum_ = "";
+      /**
+       * <code>optional string minimum = 1;</code>
+       */
+      public boolean hasMinimum() {
+        return ((bitField0_ & 0x00000001) == 0x00000001);
+      }
+      /**
+       * <code>optional string minimum = 1;</code>
+       */
+      public java.lang.String getMinimum() {
+        java.lang.Object ref = minimum_;
+        if (!(ref instanceof java.lang.String)) {
+          java.lang.String s = ((com.google.protobuf.ByteString) ref)
+              .toStringUtf8();
+          minimum_ = s;
+          return s;
+        } else {
+          return (java.lang.String) ref;
+        }
+      }
+      /**
+       * <code>optional string minimum = 1;</code>
+       */
+      public com.google.protobuf.ByteString
+          getMinimumBytes() {
+        java.lang.Object ref = minimum_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b =
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (java.lang.String) ref);
+          minimum_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>optional string minimum = 1;</code>
+       */
+      public Builder setMinimum(
+          java.lang.String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000001;
+        minimum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string minimum = 1;</code>
+       */
+      public Builder clearMinimum() {
+        bitField0_ = (bitField0_ & ~0x00000001);
+        minimum_ = getDefaultInstance().getMinimum();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string minimum = 1;</code>
+       */
+      public Builder setMinimumBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000001;
+        minimum_ = value;
+        onChanged();
+        return this;
+      }
+
+      // optional string maximum = 2;
+      private java.lang.Object maximum_ = "";
+      /**
+       * <code>optional string maximum = 2;</code>
+       */
+      public boolean hasMaximum() {
+        return ((bitField0_ & 0x00000002) == 0x00000002);
+      }
+      /**
+       * <code>optional string maximum = 2;</code>
+       */
+      public java.lang.String getMaximum() {
+        java.lang.Object ref = maximum_;
+        if (!(ref instanceof java.lang.String)) {
+          java.lang.String s = ((com.google.protobuf.ByteString) ref)
+              .toStringUtf8();
+          maximum_ = s;
+          return s;
+        } else {
+          return (java.lang.String) ref;
+        }
+      }
+      /**
+       * <code>optional string maximum = 2;</code>
+       */
+      public com.google.protobuf.ByteString
+          getMaximumBytes() {
+        java.lang.Object ref = maximum_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b =
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (java.lang.String) ref);
+          maximum_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>optional string maximum = 2;</code>
+       */
+      public Builder setMaximum(
+          java.lang.String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000002;
+        maximum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string maximum = 2;</code>
+       */
+      public Builder clearMaximum() {
+        bitField0_ = (bitField0_ & ~0x00000002);
+        maximum_ = getDefaultInstance().getMaximum();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional string maximum = 2;</code>
+       */
+      public Builder setMaximumBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000002;
+        maximum_ = value;
+        onChanged();
+        return this;
+      }
+
+      // optional sint64 sum = 3;
+      private long sum_ ;
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       *
+       * <pre>
+       * sum will store the total length of all strings in a stripe
+       * </pre>
+       */
+      public boolean hasSum() {
+        return ((bitField0_ & 0x00000004) == 0x00000004);
+      }
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       *
+       * <pre>
+       * sum will store the total length of all strings in a stripe
+       * </pre>
+       */
+      public long getSum() {
+        return sum_;
+      }
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       *
+       * <pre>
+       * sum will store the total length of all strings in a stripe
+       * </pre>
+       */
+      public Builder setSum(long value) {
+        bitField0_ |= 0x00000004;
+        sum_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional sint64 sum = 3;</code>
+       *
+       * <pre>
+       * sum will store the total length of all strings in a stripe
+       * </pre>
+       */
+      public Builder clearSum() {
+        bitField0_ = (bitField0_ & ~0x00000004);
+        sum_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      // @@protoc_insertion_point(builder_scope:orc.proto.StringStatistics)
+    }
+
+    static {
+      defaultInstance = new StringStatistics(true);
+      defaultInstance.initFields();
+    }
+
+    // @@protoc_insertion_point(class_scope:orc.proto.StringStatistics)
+  }
+
+  public interface BucketStatisticsOrBuilder
+      extends com.google.protobuf.MessageOrBuilder {
+
+    // repeated uint64 count = 1 [packed = true];
+    /**
+     * <code>repeated uint64 count = 1 [packed = true];</code>
+     */
+    java.util.List<java.lang.Long> getCountList();
+    /**
+     * <code>repeated uint64 count = 1 [packed = true];</code>
+     */
+    int getCountCount();
+    /**
+     * <code>repeated uint64 count = 1 [packed = true];</code>
+     */
+    long getCount(int index);
+  }
+  /**
+   * Protobuf type {@code orc.proto.BucketStatistics}
+   */
+  public static final class BucketStatistics extends
+      com.google.protobuf.GeneratedMessage
+      implements BucketStatisticsOrBuilder {
+    // Use BucketStatistics.newBuilder() to construct.
+    private BucketStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
+      super(builder);
+      this.unknownFields = builder.getUnknownFields();
+    }
+    private BucketStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+    private static final BucketStatistics defaultInstance;
+    public static BucketStatistics getDefaultInstance() {
+      return defaultInstance;
+    }
+
+    public BucketStatistics getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+
+    private final com.google.protobuf.UnknownFieldSet unknownFields;
+    @java.lang.Override
+    public final com.google.protobuf.UnknownFieldSet
+        getUnknownFields() {
+      return this.unknownFields;
+    }
+    private BucketStatistics(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      initFields();
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+            case 8: {
+              if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) {
+                count_ = new java.util.ArrayList<java.lang.Long>();
+                mutable_bitField0_ |= 0x00000001;
+              }
+              count_.add(input.readUInt64());
+              break;
+            }
+            case 10: {
+              int length = input.readRawVarint32();
+              int limit = input.pushLimit(length);
+              if (!((mutable_bitField0_ & 0x00000001) == 0x00000001) && input.getBytesUntilLimit() > 0) {
+                count_ = new java.util.ArrayList<java.lang.Long>();
+                mutable_bitField0_ |= 0x00000001;
+              }
+              while (input.getBytesUntilLimit() > 0) {
+                count_.add(input.readUInt64());
+              }
+              input.popLimit(limit);
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e.getMessage()).setUnfinishedMessage(this);
+      } finally {
+        if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) {
+          count_ = java.util.Collections.unmodifiableList(count_);
+        }
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return OrcProto.internal_static_orc_proto_BucketStatistics_descriptor;
+    }
+
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return OrcProto.internal_static_orc_proto_BucketStatistics_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              OrcProto.BucketStatistics.class, OrcProto.BucketStatistics.Builder.class);
+    }
+
+    public static com.google.protobuf.Parser<BucketStatistics> PARSER =
+        new com.google.protobuf.AbstractParser<BucketStatistics>() {
+      public BucketStatistics parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new BucketStatistics(input, extensionRegistry);
+      }
+    };
+
+    @java.lang.Override
+    public com.google.protobuf.Parser<BucketStatistics> getParserForType() {
+      return PARSER;
+    }
+
+    // repeated uint64 count = 1 [packed = true];
+    public static final int COUNT_FIELD_NUMBER = 1;
+    private java.util.List<java.lang.Long> count_;
+    /**
+     * <code>repeated uint64 count = 1 [packed = true];</code>
+     */
+    public java.util.List<java.lang.Long>
+        getCountList() {
+      return count_;
+    }
+    /**
+     * <code>repeated uint64 count = 1 [packed = true];</code>
+     */
+    public int getCountCount() {
+      return count_.size();
+    }
+    /**
+     * <code>repeated uint64 count = 1 [packed = true];</code>
+     */
+    public long getCount(int index) {
+      return count_.get(index);
+    }
+    private int countMemoizedSerializedSize = -1;
+
+    private void initFields() {
+      count_ = java.util.Collections.emptyList();
+    }
+    private byte memoizedIsInitialized = -1;
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized != -1) return isInitialized == 1;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (getCountList().size() > 0) {
+        output.writeRawVarint32(10);
+        output.writeRawVarint32(countMemoizedSerializedSize);
+      }
+      for (int i = 0; i < count_.size(); i++) {
+        output.writeUInt64NoTag(count_.get(i));
+      }
+      getUnknownFields().writeTo(output);
+    }
+
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      {
+        int dataSize = 0;
+        for (int i = 0; i < count_.size(); i++) {
+          dataSize += com.google.protobuf.CodedOutputStream
+            .computeUInt64SizeNoTag(count_.get(i));
+        }
+        size += dataSize;
+        if (!getCountList().isEmpty()) {
+          size += 1;
+          size += com.google.protobuf.CodedOutputStream
+              .computeInt32SizeNoTag(dataSize);
+        }
+        countMemoizedSerializedSize = dataSize;
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+
+    private static final long serialVersionUID = 0L;
+    @java.lang.Override
+    protected java.lang.Object writeReplace()
+        throws java.io.ObjectStreamException {
+      return super.writeReplace();
+    }
+
+    public static OrcProto.BucketStatistics parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.BucketStatistics parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.BucketStatistics parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static OrcProto.BucketStatistics parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static OrcProto.BucketStatistics parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.BucketStatistics parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+    public static OrcProto.BucketStatistics parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input);
+    }
+    public static OrcProto.BucketStatistics parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseDelimitedFrom(input, extensionRegistry);
+    }
+    public static OrcProto.BucketStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input);
+    }
+    public static OrcProto.BucketStatistics parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return PARSER.parseFrom(input, extensionRegistry);
+    }
+
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(OrcProto.BucketStatistics prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+
+    @java.lang.Override
+    protected Builder newBuilderForType(
+        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code orc.proto.BucketStatistics}
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder>
+       implements OrcProto.BucketStatisticsOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return OrcProto.internal_static_orc_proto_BucketStatistics_descriptor;
+      }
+
+      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return OrcProto.internal_static_orc_proto_BucketStatistics_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                OrcProto.BucketStatistics.class, OrcProto.BucketStatistics.Builder.class);
+      }
+
+      // Construct using OrcProto.BucketStatistics.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
+        }
+      }
+      private static Builder create() {
+        return new Builder();
+      }
+
+      public Builder clear() {
+        super.clear();
+        count_ = java.util.Collections.emptyList();
+        bitField0_ = (bitField0_ & ~0x00000001);
+        return this;
+      }
+
+      public Builder clone() {
+        return create().mergeFrom(buildPartial());
+      }
+
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return OrcProto.internal_static_orc_proto_BucketStatistics_descriptor;
+      }
+
+      public OrcProto.BucketStatistics getDefaultInstanceForType() {
+        return OrcProto.BucketStatistics.getDefaultInstance();
+      }
+
+      public OrcProto.BucketStatistics build() {
+        OrcProto.BucketStatistics result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      public OrcProto.BucketStatistics buildPartial() {
+        OrcProto.BucketStatistics result = new OrcProto.BucketStatistics(this);
+        int from_bitField0_ = bitField0_;
+        if (((bitField0_ & 0x00000001) == 0x00000001)) {
+          count_ = java.util.Collections.unmodifiableList(count_);
+          bitField0_ = (bitField0_ & ~0x00000001);
+        }
+        result.count_ = count_;
+        onBuilt();
+        return result;
+      }
+
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof OrcProto.BucketStatistics) {
+          return mergeFrom((OrcProto.BucketStatistics)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(OrcProto.BucketStatistics other) {
+        if (other == OrcProto.BucketStatistics.getDefaultInstance()) return this;
+        if (!other.count_.isEmpty()) {
+          if (count_.isEmpty()) {
+            count_ = other.count_;
+            bitField0_ = (bitField0_ & ~0x00000001);
+          } else {
+            ensureCountIsMutable();
+            count_.addAll(other.count_);
+          }
+          onChanged();
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        OrcProto.BucketStatistics parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (OrcProto.BucketStatistics) e.getUnfinishedMessage();
+          throw e;
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+      private int bitField0_;
+
+      // repeated uint64 count = 1 [packed = true];
+      private java.util.List<java.lang.Long> count_ = java.util.Collections.emptyList();
+      private void ensureCountIsMutable() {
+        if (!((bitField0_ & 0x00000001) == 0x00000001)) {
+          count_ = new java.util.ArrayList<java.lang.Long>(count_);
+          bitField0_ |= 0x00000001;
+         }
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public java.util.List<java.lang.Long>
+          getCountList() {
+        return java.util.Collections.unmodifiableList(count_);
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public int getCountCount() {
+        return count_.size();
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public long getCount(int index) {
+        return count_.get(index);
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public Builder setCount(
+          int index, long value) {
+        ensureCountIsMutable();
+        count_.set(index, value);
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public Builder addCount(long value) {
+        ensureCountIsMutable();
+        count_.add(value);
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public Builder addAllCount(
+          java.lang.Iterable<? extends java.lang.Long> values) {
+        ensureCountIsMutable();
+        super.addAll(values, count_);
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>repeated uint64 count = 1 [packed = true];</code>
+       */
+      public Builder clearCount() {
+        count_ = java.util.Collections.emptyList();
+        bitField0_ = (bitField0_ & ~0x00000001);
+        onChanged();
+        return this;
+      }
+
+      // @@protoc_insertion_point(builder_scope:orc.proto.BucketStatistics)
+    }
+
+    static {
+      defaultInstance = new BucketStatistics(true);
+      defaultInstance.initFields();
+    }
+
+    // @@protoc_insertion_point(class_scope:orc.proto.BucketStatistics)
+  }
+
+  public interface DecimalStatisticsOrBuilder
+      extends com.google.protobuf.MessageOrBuilder {
+
+    // optional string minimum = 1;
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    boolean hasMinimum();
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    java.lang.String getMinimum();
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    com.google.protobuf.ByteString
+        getMinimumBytes();
+
+    // optional string maximum = 2;
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    boolean hasMaximum();
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    java.lang.String getMaximum();
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    com.google.protobuf.ByteString
+        getMaximumBytes();
+
+    // optional string sum = 3;
+    /**
+     * <code>optional string sum = 3;</code>
+     */
+    boolean hasSum();
+    /**
+     * <code>optional string sum = 3;</code>
+     */
+    java.lang.String getSum();
+    /**
+     * <code>optional string sum = 3;</code>
+     */
+    com.google.protobuf.ByteString
+        getSumBytes();
+  }
+  /**
+   * Protobuf type {@code orc.proto.DecimalStatistics}
+   */
+  public static final class DecimalStatistics extends
+      com.google.protobuf.GeneratedMessage
+      implements DecimalStatisticsOrBuilder {
+    // Use DecimalStatistics.newBuilder() to construct.
+    private DecimalStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
+      super(builder);
+      this.unknownFields = builder.getUnknownFields();
+    }
+    private DecimalStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+    private static final DecimalStatistics defaultInstance;
+    public static DecimalStatistics getDefaultInstance() {
+      return defaultInstance;
+    }
+
+    public DecimalStatistics getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+
+    private final com.google.protobuf.UnknownFieldSet unknownFields;
+    @java.lang.Override
+    public final com.google.protobuf.UnknownFieldSet
+        getUnknownFields() {
+      return this.unknownFields;
+    }
+    private DecimalStatistics(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      initFields();
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+            case 10: {
+              bitField0_ |= 0x00000001;
+              minimum_ = input.readBytes();
+              break;
+            }
+            case 18: {
+              bitField0_ |= 0x00000002;
+              maximum_ = input.readBytes();
+              break;
+            }
+            case 26: {
+              bitField0_ |= 0x00000004;
+              sum_ = input.readBytes();
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e.getMessage()).setUnfinishedMessage(this);
+      } finally {
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return OrcProto.internal_static_orc_proto_DecimalStatistics_descriptor;
+    }
+
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return OrcProto.internal_static_orc_proto_DecimalStatistics_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              OrcProto.DecimalStatistics.class, OrcProto.DecimalStatistics.Builder.class);
+    }
+
+    public static com.google.protobuf.Parser<DecimalStatistics> PARSER =
+        new com.google.protobuf.AbstractParser<DecimalStatistics>() {
+      public DecimalStatistics parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new DecimalStatistics(input, extensionRegistry);
+      }
+    };
+
+    @java.lang.Override
+    public com.google.protobuf.Parser<DecimalStatistics> getParserForType() {
+      return PARSER;
+    }
+
+    private int bitField0_;
+    // optional string minimum = 1;
+    public static final int MINIMUM_FIELD_NUMBER = 1;
+    private java.lang.Object minimum_;
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    public boolean hasMinimum() {
+      return ((bitField0_ & 0x00000001) == 0x00000001);
+    }
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    public java.lang.String getMinimum() {
+      java.lang.Object ref = minimum_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs =
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          minimum_ = s;
+        }
+        return s;
+      }
+    }
+    /**
+     * <code>optional string minimum = 1;</code>
+     */
+    public com.google.protobuf.ByteString
+        getMinimumBytes() {
+      java.lang.Object ref = minimum_;
+      if (ref instanceof java.lang.String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (java.lang.String) ref);
+        minimum_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    // optional string maximum = 2;
+    public static final int MAXIMUM_FIELD_NUMBER = 2;
+    private java.lang.Object maximum_;
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    public boolean hasMaximum() {
+      return ((bitField0_ & 0x00000002) == 0x00000002);
+    }
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    public java.lang.String getMaximum() {
+      java.lang.Object ref = maximum_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs =
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          maximum_ = s;
+        }
+        return s;
+      }
+    }
+    /**
+     * <code>optional string maximum = 2;</code>
+     */
+    public com.google.protobuf.ByteString
+        getMaximumBytes() {
+      java.lang.Object ref = maximum_;
+      if (ref instanceof java.lang.String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (java.lang.String) ref);
+        maximum_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    // optional string sum = 3;
+    public static final int SUM_FIELD_NUMBER = 3;
+    private java.lang.Object sum_;
+    /**
+     * <code>optional string sum = 3;</code>
+     */
+    public boolean hasSum() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional string sum = 3;</code>
+     */
+    public java.lang.String getSum() {
+      java.lang.Object ref = sum_;
+      if (ref instanceof java.lang.String) {
+        return (java.lang.String) ref;
+      } else {
+        com.google.protobuf.ByteString bs =
+            (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (bs.isValidUtf8()) {
+          sum_ = s;
+        }
+        return s;
+  

<TRUNCATED>

[32/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/ConvertTreeReaderFactory.java b/orc/src/java/org/apache/hive/orc/impl/ConvertTreeReaderFactory.java
new file mode 100644
index 0000000..21020b8
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/ConvertTreeReaderFactory.java
@@ -0,0 +1,2892 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.EnumMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.OrcProto;
+
+/**
+ * Convert ORC tree readers.
+ */
+public class ConvertTreeReaderFactory extends TreeReaderFactory {
+
+  /**
+   * Override methods like checkEncoding to pass-thru to the convert TreeReader.
+   */
+  public static class ConvertTreeReader extends TreeReader {
+
+    private TreeReader convertTreeReader;
+
+    ConvertTreeReader(int columnId) throws IOException {
+      super(columnId);
+    }
+
+    // The ordering of types here is used to determine which numeric types
+    // are common/convertible to one another. Probably better to rely on the
+    // ordering explicitly defined here than to assume that the enum values
+    // that were arbitrarily assigned in PrimitiveCategory work for our purposes.
+    private static EnumMap<TypeDescription.Category, Integer> numericTypes =
+        new EnumMap<>(TypeDescription.Category.class);
+
+    static {
+      registerNumericType(TypeDescription.Category.BOOLEAN, 1);
+      registerNumericType(TypeDescription.Category.BYTE, 2);
+      registerNumericType(TypeDescription.Category.SHORT, 3);
+      registerNumericType(TypeDescription.Category.INT, 4);
+      registerNumericType(TypeDescription.Category.LONG, 5);
+      registerNumericType(TypeDescription.Category.FLOAT, 6);
+      registerNumericType(TypeDescription.Category.DOUBLE, 7);
+      registerNumericType(TypeDescription.Category.DECIMAL, 8);
+    }
+
+    private static void registerNumericType(TypeDescription.Category kind, int level) {
+      numericTypes.put(kind, level);
+    }
+
+    protected void setConvertTreeReader(TreeReader convertTreeReader) {
+      this.convertTreeReader = convertTreeReader;
+    }
+
+    protected TreeReader getStringGroupTreeReader(int columnId,
+        TypeDescription fileType) throws IOException {
+      switch (fileType.getCategory()) {
+      case STRING:
+        return new StringTreeReader(columnId);
+      case CHAR:
+        return new CharTreeReader(columnId, fileType.getMaxLength());
+      case VARCHAR:
+        return new VarcharTreeReader(columnId, fileType.getMaxLength());
+      default:
+        throw new RuntimeException("Unexpected type kind " + fileType.getCategory().name());
+      }
+    }
+
+    protected void assignStringGroupVectorEntry(BytesColumnVector bytesColVector,
+        int elementNum, TypeDescription readerType, byte[] bytes) {
+      assignStringGroupVectorEntry(bytesColVector,
+          elementNum, readerType, bytes, 0, bytes.length);
+    }
+
+    /*
+     * Assign a BytesColumnVector entry when we have a byte array, start, and
+     * length for the string group which can be (STRING, CHAR, VARCHAR).
+     */
+    protected void assignStringGroupVectorEntry(BytesColumnVector bytesColVector,
+        int elementNum, TypeDescription readerType, byte[] bytes, int start, int length) {
+      switch (readerType.getCategory()) {
+      case STRING:
+        bytesColVector.setVal(elementNum, bytes, start, length);
+        break;
+      case CHAR:
+        {
+          int adjustedDownLen =
+              StringExpr.rightTrimAndTruncate(bytes, start, length, readerType.getMaxLength());
+          bytesColVector.setVal(elementNum, bytes, start, adjustedDownLen);
+        }
+        break;
+      case VARCHAR:
+        {
+          int adjustedDownLen =
+              StringExpr.truncate(bytes, start, length, readerType.getMaxLength());
+          bytesColVector.setVal(elementNum, bytes, start, adjustedDownLen);
+        }
+        break;
+      default:
+        throw new RuntimeException("Unexpected type kind " + readerType.getCategory().name());
+      }
+    }
+
+    protected void convertStringGroupVectorElement(BytesColumnVector bytesColVector,
+        int elementNum, TypeDescription readerType) {
+      switch (readerType.getCategory()) {
+      case STRING:
+        // No conversion needed.
+        break;
+      case CHAR:
+        {
+          int length = bytesColVector.length[elementNum];
+          int adjustedDownLen = StringExpr
+            .rightTrimAndTruncate(bytesColVector.vector[elementNum],
+                bytesColVector.start[elementNum], length,
+                readerType.getMaxLength());
+          if (adjustedDownLen < length) {
+            bytesColVector.length[elementNum] = adjustedDownLen;
+          }
+        }
+        break;
+      case VARCHAR:
+        {
+          int length = bytesColVector.length[elementNum];
+          int adjustedDownLen = StringExpr
+            .truncate(bytesColVector.vector[elementNum],
+                bytesColVector.start[elementNum], length,
+                readerType.getMaxLength());
+          if (adjustedDownLen < length) {
+            bytesColVector.length[elementNum] = adjustedDownLen;
+          }
+        }
+        break;
+      default:
+        throw new RuntimeException("Unexpected type kind " + readerType.getCategory().name());
+      }
+    }
+
+    private boolean isParseError;
+
+    /*
+     * We do this because we want the various parse methods return a primitive.
+     *
+     * @return true if there was a parse error in the last call to
+     * parseLongFromString, etc.
+     */
+    protected boolean getIsParseError() {
+      return isParseError;
+    }
+
+    protected long parseLongFromString(String string) {
+      try {
+        long longValue = Long.parseLong(string);
+        isParseError = false;
+        return longValue;
+      } catch (NumberFormatException e) {
+        isParseError = true;
+        return 0;
+      }
+    }
+
+    protected float parseFloatFromString(String string) {
+      try {
+        float floatValue = Float.parseFloat(string);
+        isParseError = false;
+        return floatValue;
+      } catch (NumberFormatException e) {
+        isParseError = true;
+        return Float.NaN;
+      }
+    }
+
+    protected double parseDoubleFromString(String string) {
+      try {
+        double value = Double.parseDouble(string);
+        isParseError = false;
+        return value;
+      } catch (NumberFormatException e) {
+        isParseError = true;
+        return Double.NaN;
+      }
+    }
+
+    /**
+     * @param string
+     * @return the HiveDecimal parsed, or null if there was a parse error.
+     */
+    protected HiveDecimal parseDecimalFromString(String string) {
+      try {
+        HiveDecimal value = HiveDecimal.create(string);
+        return value;
+      } catch (NumberFormatException e) {
+        return null;
+      }
+    }
+
+    /**
+     * @param string
+     * @return the Timestamp parsed, or null if there was a parse error.
+     */
+    protected Timestamp parseTimestampFromString(String string) {
+      try {
+        Timestamp value = Timestamp.valueOf(string);
+        return value;
+      } catch (IllegalArgumentException e) {
+        return null;
+      }
+    }
+
+    /**
+     * @param string
+     * @return the Date parsed, or null if there was a parse error.
+     */
+    protected Date parseDateFromString(String string) {
+      try {
+        Date value = Date.valueOf(string);
+        return value;
+      } catch (IllegalArgumentException e) {
+        return null;
+      }
+    }
+
+    protected String stringFromBytesColumnVectorEntry(
+        BytesColumnVector bytesColVector, int elementNum) {
+      String string;
+
+      string = new String(
+          bytesColVector.vector[elementNum],
+          bytesColVector.start[elementNum], bytesColVector.length[elementNum],
+          StandardCharsets.UTF_8);
+
+      return string;
+    }
+
+    private static final double MIN_LONG_AS_DOUBLE = -0x1p63;
+    /*
+     * We cannot store Long.MAX_VALUE as a double without losing precision. Instead, we store
+     * Long.MAX_VALUE + 1 == -Long.MIN_VALUE, and then offset all comparisons by 1.
+     */
+    private static final double MAX_LONG_AS_DOUBLE_PLUS_ONE = 0x1p63;
+
+    public boolean doubleCanFitInLong(double doubleValue) {
+
+      // Borrowed from Guava DoubleMath.roundToLong except do not want dependency on Guava and we
+      // don't want to catch an exception.
+
+      return ((MIN_LONG_AS_DOUBLE - doubleValue < 1.0) &&
+              (doubleValue < MAX_LONG_AS_DOUBLE_PLUS_ONE));
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      // Pass-thru.
+      convertTreeReader.checkEncoding(encoding);
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+        OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      // Pass-thru.
+      convertTreeReader.startStripe(streams, stripeFooter);
+    }
+
+    @Override
+    public void seek(PositionProvider[] index) throws IOException {
+     // Pass-thru.
+      convertTreeReader.seek(index);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      // Pass-thru.
+      convertTreeReader.seek(index);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      // Pass-thru.
+      convertTreeReader.skipRows(items);
+    }
+
+    /**
+     * Override this to use convertVector.
+     * Source and result are member variables in the subclass with the right
+     * type.
+     * @param elementNum
+     * @throws IOException
+     */
+    // Override this to use convertVector.
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      throw new RuntimeException("Expected this method to be overriden");
+    }
+
+    // Common code used by the conversion.
+    public void convertVector(ColumnVector fromColVector,
+        ColumnVector resultColVector, final int batchSize) throws IOException {
+
+      resultColVector.reset();
+      if (fromColVector.isRepeating) {
+        resultColVector.isRepeating = true;
+        if (fromColVector.noNulls || !fromColVector.isNull[0]) {
+          setConvertVectorElement(0);
+        } else {
+          resultColVector.noNulls = false;
+          resultColVector.isNull[0] = true;
+        }
+      } else if (fromColVector.noNulls){
+        for (int i = 0; i < batchSize; i++) {
+          setConvertVectorElement(i);
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!fromColVector.isNull[i]) {
+            setConvertVectorElement(i);
+          } else {
+            resultColVector.noNulls = false;
+            resultColVector.isNull[i] = true;
+          }
+        }
+      }
+    }
+
+    public void downCastAnyInteger(LongColumnVector longColVector, int elementNum,
+        TypeDescription readerType) {
+      downCastAnyInteger(longColVector, elementNum, longColVector.vector[elementNum], readerType);
+    }
+
+    public void downCastAnyInteger(LongColumnVector longColVector, int elementNum, long inputLong,
+        TypeDescription readerType) {
+      long[] vector = longColVector.vector;
+      long outputLong;
+      TypeDescription.Category readerCategory = readerType.getCategory();
+      switch (readerCategory) {
+      case BOOLEAN:
+        // No data loss for boolean.
+        vector[elementNum] = inputLong == 0 ? 0 : 1;
+        return;
+      case BYTE:
+        outputLong = (byte) inputLong;
+        break;
+      case SHORT:
+        outputLong = (short) inputLong;
+        break;
+      case INT:
+        outputLong = (int) inputLong;
+        break;
+      case LONG:
+        // No data loss for long.
+        vector[elementNum] = inputLong;
+        return;
+      default:
+        throw new RuntimeException("Unexpected type kind " + readerCategory.name());
+      }
+
+      if (outputLong != inputLong) {
+        // Data loss.
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        vector[elementNum] = outputLong;
+      }
+    }
+
+    protected boolean integerDownCastNeeded(TypeDescription fileType, TypeDescription readerType) {
+      Integer fileLevel = numericTypes.get(fileType.getCategory());
+      Integer schemaLevel = numericTypes.get(readerType.getCategory());
+      return (schemaLevel.intValue() < fileLevel.intValue());
+    }
+  }
+
+  public static class AnyIntegerTreeReader extends ConvertTreeReader {
+
+    private TypeDescription.Category fileTypeCategory;
+    private TreeReader anyIntegerTreeReader;
+
+    private long longValue;
+
+    AnyIntegerTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.fileTypeCategory = fileType.getCategory();
+      switch (fileTypeCategory) {
+      case BOOLEAN:
+        anyIntegerTreeReader = new BooleanTreeReader(columnId);
+        break;
+      case BYTE:
+        anyIntegerTreeReader = new ByteTreeReader(columnId);
+        break;
+      case SHORT:
+        anyIntegerTreeReader = new ShortTreeReader(columnId);
+        break;
+      case INT:
+        anyIntegerTreeReader = new IntTreeReader(columnId);
+        break;
+      case LONG:
+        anyIntegerTreeReader = new LongTreeReader(columnId, skipCorrupt);
+        break;
+      default:
+        throw new RuntimeException("Unexpected type kind " + fileType.getCategory().name());
+      }
+      setConvertTreeReader(anyIntegerTreeReader);
+    }
+
+    protected long getLong() throws IOException {
+      return longValue;
+    }
+
+    protected String getString(long longValue) {
+      if (fileTypeCategory == TypeDescription.Category.BOOLEAN) {
+        return longValue == 0 ? "FALSE" : "TRUE";
+      } else {
+        return Long.toString(longValue);
+      }
+    }
+
+    protected String getString() {
+      return getString(longValue);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      anyIntegerTreeReader.nextVector(previousVector, isNull, batchSize);
+    }
+  }
+
+  public static class AnyIntegerFromAnyIntegerTreeReader extends ConvertTreeReader {
+
+    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
+
+    private final TypeDescription readerType;
+    private final boolean downCastNeeded;
+
+    AnyIntegerFromAnyIntegerTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      anyIntegerAsLongTreeReader = new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+      setConvertTreeReader(anyIntegerAsLongTreeReader);
+      downCastNeeded = integerDownCastNeeded(fileType, readerType);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      anyIntegerAsLongTreeReader.nextVector(previousVector, isNull, batchSize);
+      LongColumnVector resultColVector = (LongColumnVector) previousVector;
+      if (downCastNeeded) {
+        if (resultColVector.isRepeating) {
+          if (resultColVector.noNulls || !resultColVector.isNull[0]) {
+            downCastAnyInteger(resultColVector, 0, readerType);
+          } else {
+            // Result remains null.
+          }
+        } else if (resultColVector.noNulls){
+          for (int i = 0; i < batchSize; i++) {
+            downCastAnyInteger(resultColVector, i, readerType);
+          }
+        } else {
+          for (int i = 0; i < batchSize; i++) {
+            if (!resultColVector.isNull[i]) {
+              downCastAnyInteger(resultColVector, i, readerType);
+            } else {
+              // Result remains null.
+            }
+          }
+        }
+      }
+    }
+  }
+
+  public static class AnyIntegerFromFloatTreeReader extends ConvertTreeReader {
+
+    private FloatTreeReader floatTreeReader;
+
+    private final TypeDescription readerType;
+    private DoubleColumnVector doubleColVector;
+    private LongColumnVector longColVector;
+
+    AnyIntegerFromFloatTreeReader(int columnId, TypeDescription readerType)
+        throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      floatTreeReader = new FloatTreeReader(columnId);
+      setConvertTreeReader(floatTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      double doubleValue = doubleColVector.vector[elementNum];
+      if (!doubleCanFitInLong(doubleValue)) {
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        // UNDONE: Does the overflow check above using double really work here for float?
+        float floatValue = (float) doubleValue;
+        downCastAnyInteger(longColVector, elementNum, (long) floatValue, readerType);
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class AnyIntegerFromDoubleTreeReader extends ConvertTreeReader {
+
+    private DoubleTreeReader doubleTreeReader;
+
+    private final TypeDescription readerType;
+    private DoubleColumnVector doubleColVector;
+    private LongColumnVector longColVector;
+
+    AnyIntegerFromDoubleTreeReader(int columnId, TypeDescription readerType)
+        throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      doubleTreeReader = new DoubleTreeReader(columnId);
+      setConvertTreeReader(doubleTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      double doubleValue = doubleColVector.vector[elementNum];
+      if (!doubleCanFitInLong(doubleValue)) {
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        downCastAnyInteger(longColVector, elementNum, (long) doubleValue, readerType);
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class AnyIntegerFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private final int precision;
+    private final int scale;
+    private final TypeDescription readerType;
+    private DecimalColumnVector decimalColVector;
+    private LongColumnVector longColVector;
+
+    AnyIntegerFromDecimalTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType) throws IOException {
+      super(columnId);
+      this.precision = fileType.getPrecision();
+      this.scale = fileType.getScale();
+      this.readerType = readerType;
+      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
+      setConvertTreeReader(decimalTreeReader);
+    }
+
+    private static HiveDecimal DECIMAL_MAX_LONG = HiveDecimal.create(Long.MAX_VALUE);
+    private static HiveDecimal DECIMAL_MIN_LONG = HiveDecimal.create(Long.MIN_VALUE);
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      HiveDecimal decimalValue = decimalColVector.vector[elementNum].getHiveDecimal();
+      if (decimalValue.compareTo(DECIMAL_MAX_LONG) > 0 ||
+          decimalValue.compareTo(DECIMAL_MIN_LONG) < 0) {
+        longColVector.isNull[elementNum] = true;
+        longColVector.noNulls = false;
+      } else {
+        downCastAnyInteger(longColVector, elementNum, decimalValue.longValue(), readerType);
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (decimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        decimalColVector = new DecimalColumnVector(precision, scale);
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
+
+      convertVector(decimalColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class AnyIntegerFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private final TypeDescription readerType;
+    private BytesColumnVector bytesColVector;
+    private LongColumnVector longColVector;
+
+    AnyIntegerFromStringGroupTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
+      long longValue = parseLongFromString(string);
+      if (!getIsParseError()) {
+        downCastAnyInteger(longColVector, elementNum, longValue, readerType);
+      } else {
+        longColVector.noNulls = false;
+        longColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (bytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        bytesColVector = new BytesColumnVector();
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
+
+      convertVector(bytesColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class AnyIntegerFromTimestampTreeReader extends ConvertTreeReader {
+
+    private TimestampTreeReader timestampTreeReader;
+
+    private final TypeDescription readerType;
+    private TimestampColumnVector timestampColVector;
+    private LongColumnVector longColVector;
+
+    AnyIntegerFromTimestampTreeReader(int columnId, TypeDescription readerType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
+      setConvertTreeReader(timestampTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      // Use TimestampWritable's getSeconds.
+      long longValue = TimestampUtils.millisToSeconds(
+          timestampColVector.asScratchTimestamp(elementNum).getTime());
+      downCastAnyInteger(longColVector, elementNum, longValue, readerType);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (timestampColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        timestampColVector = new TimestampColumnVector();
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
+
+      convertVector(timestampColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class FloatFromAnyIntegerTreeReader extends ConvertTreeReader {
+
+    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
+
+    private LongColumnVector longColVector;
+    private DoubleColumnVector doubleColVector;
+
+    FloatFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      anyIntegerAsLongTreeReader =
+          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+      setConvertTreeReader(anyIntegerAsLongTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      float floatValue = (float) longColVector.vector[elementNum];
+      if (!Float.isNaN(floatValue)) {
+        doubleColVector.vector[elementNum] = floatValue;
+      } else {
+        doubleColVector.vector[elementNum] = Double.NaN;
+        doubleColVector.noNulls = false;
+        doubleColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class FloatFromDoubleTreeReader extends ConvertTreeReader {
+
+    private DoubleTreeReader doubleTreeReader;
+
+    FloatFromDoubleTreeReader(int columnId) throws IOException {
+      super(columnId);
+      doubleTreeReader = new DoubleTreeReader(columnId);
+      setConvertTreeReader(doubleTreeReader);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      doubleTreeReader.nextVector(previousVector, isNull, batchSize);
+
+      DoubleColumnVector resultColVector = (DoubleColumnVector) previousVector;
+      double[] resultVector = resultColVector.vector;
+      if (resultColVector.isRepeating) {
+        if (resultColVector.noNulls || !resultColVector.isNull[0]) {
+          resultVector[0] = (float) resultVector[0];
+        } else {
+          // Remains null.
+        }
+      } else if (resultColVector.noNulls){
+        for (int i = 0; i < batchSize; i++) {
+          resultVector[i] = (float) resultVector[i];
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!resultColVector.isNull[i]) {
+            resultVector[i] = (float) resultVector[i];
+          } else {
+            // Remains null.
+          }
+        }
+      }
+    }
+  }
+
+  public static class FloatFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private final int precision;
+    private final int scale;
+    private DecimalColumnVector decimalColVector;
+    private DoubleColumnVector doubleColVector;
+
+    FloatFromDecimalTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType) throws IOException {
+      super(columnId);
+      this.precision = fileType.getPrecision();
+      this.scale = fileType.getScale();
+      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
+      setConvertTreeReader(decimalTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      doubleColVector.vector[elementNum] =
+          (float) decimalColVector.vector[elementNum].doubleValue();
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (decimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        decimalColVector = new DecimalColumnVector(precision, scale);
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
+
+      convertVector(decimalColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class FloatFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private BytesColumnVector bytesColVector;
+    private DoubleColumnVector doubleColVector;
+
+    FloatFromStringGroupTreeReader(int columnId, TypeDescription fileType)
+        throws IOException {
+      super(columnId);
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
+      float floatValue = parseFloatFromString(string);
+      if (!getIsParseError()) {
+        doubleColVector.vector[elementNum] = floatValue;
+      } else {
+        doubleColVector.vector[elementNum] = Double.NaN;
+        doubleColVector.noNulls = false;
+        doubleColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (bytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        bytesColVector = new BytesColumnVector();
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
+
+      convertVector(bytesColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class FloatFromTimestampTreeReader extends ConvertTreeReader {
+
+    private TimestampTreeReader timestampTreeReader;
+
+    private TimestampColumnVector timestampColVector;
+    private DoubleColumnVector doubleColVector;
+
+    FloatFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
+      setConvertTreeReader(timestampTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      doubleColVector.vector[elementNum] = (float) TimestampUtils.getDouble(
+          timestampColVector.asScratchTimestamp(elementNum));
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (timestampColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        timestampColVector = new TimestampColumnVector();
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
+
+      convertVector(timestampColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class DoubleFromAnyIntegerTreeReader extends ConvertTreeReader {
+
+    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
+
+    private LongColumnVector longColVector;
+    private DoubleColumnVector doubleColVector;
+
+    DoubleFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      anyIntegerAsLongTreeReader =
+          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+      setConvertTreeReader(anyIntegerAsLongTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+
+      double doubleValue = (double) longColVector.vector[elementNum];
+      if (!Double.isNaN(doubleValue)) {
+        doubleColVector.vector[elementNum] = doubleValue;
+      } else {
+        doubleColVector.vector[elementNum] = Double.NaN;
+        doubleColVector.noNulls = false;
+        doubleColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class DoubleFromFloatTreeReader extends ConvertTreeReader {
+
+    private FloatTreeReader floatTreeReader;
+
+    DoubleFromFloatTreeReader(int columnId) throws IOException {
+      super(columnId);
+      floatTreeReader = new FloatTreeReader(columnId);
+      setConvertTreeReader(floatTreeReader);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      // we get the DoubleColumnVector produced by float tree reader first, then iterate through
+      // the elements and make double -> float -> string -> double conversion to preserve the
+      // precision. When float tree reader reads float and assign it to double, java's widening
+      // conversion adds more precision which will break all comparisons.
+      // Example: float f = 74.72
+      // double d = f ---> 74.72000122070312
+      // Double.parseDouble(String.valueOf(f)) ---> 74.72
+      floatTreeReader.nextVector(previousVector, isNull, batchSize);
+
+      DoubleColumnVector doubleColumnVector = (DoubleColumnVector) previousVector;
+      if (doubleColumnVector.isRepeating) {
+        if (doubleColumnVector.noNulls || !doubleColumnVector.isNull[0]) {
+          final float f = (float) doubleColumnVector.vector[0];
+          doubleColumnVector.vector[0] = Double.parseDouble(String.valueOf(f));
+        }
+      } else if (doubleColumnVector.noNulls){
+        for (int i = 0; i < batchSize; i++) {
+          final float f = (float) doubleColumnVector.vector[i];
+          doubleColumnVector.vector[i] = Double.parseDouble(String.valueOf(f));
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!doubleColumnVector.isNull[i]) {
+            final float f = (float) doubleColumnVector.vector[i];
+            doubleColumnVector.vector[i] = Double.parseDouble(String.valueOf(f));
+          }
+        }
+      }
+    }
+  }
+
+  public static class DoubleFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private final int precision;
+    private final int scale;
+    private DecimalColumnVector decimalColVector;
+    private DoubleColumnVector doubleColVector;
+
+    DoubleFromDecimalTreeReader(int columnId, TypeDescription fileType) throws IOException {
+      super(columnId);
+      this.precision = fileType.getPrecision();
+      this.scale = fileType.getScale();
+      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
+      setConvertTreeReader(decimalTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      doubleColVector.vector[elementNum] =
+          decimalColVector.vector[elementNum].doubleValue();
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (decimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        decimalColVector = new DecimalColumnVector(precision, scale);
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
+
+      convertVector(decimalColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class DoubleFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private BytesColumnVector bytesColVector;
+    private DoubleColumnVector doubleColVector;
+
+    DoubleFromStringGroupTreeReader(int columnId, TypeDescription fileType)
+        throws IOException {
+      super(columnId);
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
+      double doubleValue = parseDoubleFromString(string);
+      if (!getIsParseError()) {
+        doubleColVector.vector[elementNum] = doubleValue;
+      } else {
+        doubleColVector.noNulls = false;
+        doubleColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (bytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        bytesColVector = new BytesColumnVector();
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
+
+      convertVector(bytesColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class DoubleFromTimestampTreeReader extends ConvertTreeReader {
+
+    private TimestampTreeReader timestampTreeReader;
+
+    private TimestampColumnVector timestampColVector;
+    private DoubleColumnVector doubleColVector;
+
+    DoubleFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
+      setConvertTreeReader(timestampTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      doubleColVector.vector[elementNum] = TimestampUtils.getDouble(
+          timestampColVector.asScratchTimestamp(elementNum));
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (timestampColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        timestampColVector = new TimestampColumnVector();
+        doubleColVector = (DoubleColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
+
+      convertVector(timestampColVector, doubleColVector, batchSize);
+    }
+  }
+
+  public static class DecimalFromAnyIntegerTreeReader extends ConvertTreeReader {
+
+    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
+
+    private LongColumnVector longColVector;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromAnyIntegerTreeReader(int columnId, TypeDescription fileType, boolean skipCorrupt)
+        throws IOException {
+      super(columnId);
+      anyIntegerAsLongTreeReader =
+          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+      setConvertTreeReader(anyIntegerAsLongTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      long longValue = longColVector.vector[elementNum];
+      HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(longValue);
+      // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
+      decimalColVector.set(elementNum, hiveDecimalWritable);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+        boolean[] isNull,
+        final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, decimalColVector, batchSize);
+    }
+  }
+
+  public static class DecimalFromFloatTreeReader extends ConvertTreeReader {
+
+    private FloatTreeReader floatTreeReader;
+
+    private DoubleColumnVector doubleColVector;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromFloatTreeReader(int columnId, TypeDescription readerType)
+        throws IOException {
+      super(columnId);
+      floatTreeReader = new FloatTreeReader(columnId);
+      setConvertTreeReader(floatTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      float floatValue = (float) doubleColVector.vector[elementNum];
+      if (!Float.isNaN(floatValue)) {
+        HiveDecimal decimalValue =
+            HiveDecimal.create(Float.toString(floatValue));
+        if (decimalValue != null) {
+          // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
+          decimalColVector.set(elementNum, decimalValue);
+        } else {
+          decimalColVector.noNulls = false;
+          decimalColVector.isNull[elementNum] = true;
+        }
+      } else {
+        decimalColVector.noNulls = false;
+        decimalColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, decimalColVector, batchSize);
+    }
+  }
+
+  public static class DecimalFromDoubleTreeReader extends ConvertTreeReader {
+
+    private DoubleTreeReader doubleTreeReader;
+
+    private DoubleColumnVector doubleColVector;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromDoubleTreeReader(int columnId, TypeDescription readerType)
+        throws IOException {
+      super(columnId);
+      doubleTreeReader = new DoubleTreeReader(columnId);
+      setConvertTreeReader(doubleTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      HiveDecimal value =
+          HiveDecimal.create(Double.toString(doubleColVector.vector[elementNum]));
+      if (value != null) {
+        decimalColVector.set(elementNum, value);
+      } else {
+        decimalColVector.noNulls = false;
+        decimalColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, decimalColVector, batchSize);
+    }
+  }
+
+  public static class DecimalFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private BytesColumnVector bytesColVector;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromStringGroupTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType) throws IOException {
+      super(columnId);
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
+      HiveDecimal value = parseDecimalFromString(string);
+      if (value != null) {
+        // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
+        decimalColVector.set(elementNum, value);
+      } else {
+        decimalColVector.noNulls = false;
+        decimalColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (bytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        bytesColVector = new BytesColumnVector();
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
+
+      convertVector(bytesColVector, decimalColVector, batchSize);
+    }
+  }
+
+  public static class DecimalFromTimestampTreeReader extends ConvertTreeReader {
+
+    private TimestampTreeReader timestampTreeReader;
+
+    private TimestampColumnVector timestampColVector;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
+      setConvertTreeReader(timestampTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      double doubleValue = TimestampUtils.getDouble(
+          timestampColVector.asScratchTimestamp(elementNum));
+      HiveDecimal value = HiveDecimal.create(Double.toString(doubleValue));
+      if (value != null) {
+        // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
+        decimalColVector.set(elementNum, value);
+      } else {
+        decimalColVector.noNulls = false;
+        decimalColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (timestampColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        timestampColVector = new TimestampColumnVector();
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
+
+      convertVector(timestampColVector, decimalColVector, batchSize);
+    }
+  }
+
+  public static class DecimalFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private DecimalColumnVector fileDecimalColVector;
+    private int filePrecision;
+    private int fileScale;
+    private int readerPrecision;
+    private int readerScale;
+    private DecimalColumnVector decimalColVector;
+
+    DecimalFromDecimalTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType)
+        throws IOException {
+      super(columnId);
+      filePrecision = fileType.getPrecision();
+      fileScale = fileType.getScale();
+      readerPrecision = readerType.getPrecision();
+      readerScale = readerType.getScale();
+      decimalTreeReader = new DecimalTreeReader(columnId, filePrecision, fileScale);
+      setConvertTreeReader(decimalTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+
+      decimalColVector.set(elementNum, fileDecimalColVector.vector[elementNum]);
+
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (fileDecimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        fileDecimalColVector = new DecimalColumnVector(filePrecision, fileScale);
+        decimalColVector = (DecimalColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(fileDecimalColVector, isNull, batchSize);
+
+      convertVector(fileDecimalColVector, decimalColVector, batchSize);
+    }
+  }
+
+  public static class StringGroupFromAnyIntegerTreeReader extends ConvertTreeReader {
+
+    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
+
+    private final TypeDescription readerType;
+    private LongColumnVector longColVector;
+    private BytesColumnVector bytesColVector;
+
+    StringGroupFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      anyIntegerAsLongTreeReader =
+          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+      setConvertTreeReader(anyIntegerAsLongTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      long longValue = longColVector.vector[elementNum];
+      String string = anyIntegerAsLongTreeReader.getString(longValue);
+      byte[] bytes = string.getBytes();
+      assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        bytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, bytesColVector, batchSize);
+    }
+  }
+
+  public static class StringGroupFromFloatTreeReader extends ConvertTreeReader {
+
+    private FloatTreeReader floatTreeReader;
+
+    private final TypeDescription readerType;
+    private DoubleColumnVector doubleColVector;
+    private BytesColumnVector bytesColVector;
+
+
+    StringGroupFromFloatTreeReader(int columnId, TypeDescription readerType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      floatTreeReader = new FloatTreeReader(columnId);
+      setConvertTreeReader(floatTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      float floatValue = (float) doubleColVector.vector[elementNum];
+      if (!Float.isNaN(floatValue)) {
+        String string = String.valueOf(floatValue);
+        byte[] bytes = string.getBytes();
+        assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
+      } else {
+        bytesColVector.noNulls = false;
+        bytesColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        bytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, bytesColVector, batchSize);
+    }
+  }
+
+  public static class StringGroupFromDoubleTreeReader extends ConvertTreeReader {
+
+    private DoubleTreeReader doubleTreeReader;
+
+    private final TypeDescription readerType;
+    private DoubleColumnVector doubleColVector;
+    private BytesColumnVector bytesColVector;
+
+    StringGroupFromDoubleTreeReader(int columnId, TypeDescription readerType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      doubleTreeReader = new DoubleTreeReader(columnId);
+      setConvertTreeReader(doubleTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      double doubleValue = doubleColVector.vector[elementNum];
+      if (!Double.isNaN(doubleValue)) {
+        String string = String.valueOf(doubleValue);
+        byte[] bytes = string.getBytes();
+        assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
+      } else {
+        bytesColVector.noNulls = false;
+        bytesColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        bytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, bytesColVector, batchSize);
+    }
+  }
+
+
+
+  public static class StringGroupFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private int precision;
+    private int scale;
+    private final TypeDescription readerType;
+    private DecimalColumnVector decimalColVector;
+    private BytesColumnVector bytesColVector;
+    private byte[] scratchBuffer;
+
+    StringGroupFromDecimalTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.precision = fileType.getPrecision();
+      this.scale = fileType.getScale();
+      this.readerType = readerType;
+      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
+      setConvertTreeReader(decimalTreeReader);
+      scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      HiveDecimalWritable decWritable = decimalColVector.vector[elementNum];
+
+      // Convert decimal into bytes instead of a String for better performance.
+      final int byteIndex = decWritable.toBytes(scratchBuffer);
+
+      assignStringGroupVectorEntry(
+          bytesColVector, elementNum, readerType,
+          scratchBuffer, byteIndex, HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES - byteIndex);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (decimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        decimalColVector = new DecimalColumnVector(precision, scale);
+        bytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
+
+      convertVector(decimalColVector, bytesColVector, batchSize);
+    }
+  }
+
+  public static class StringGroupFromTimestampTreeReader extends ConvertTreeReader {
+
+    private TimestampTreeReader timestampTreeReader;
+
+    private final TypeDescription readerType;
+    private TimestampColumnVector timestampColVector;
+    private BytesColumnVector bytesColVector;
+
+    StringGroupFromTimestampTreeReader(int columnId, TypeDescription readerType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
+      setConvertTreeReader(timestampTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String string =
+          timestampColVector.asScratchTimestamp(elementNum).toString();
+      byte[] bytes = string.getBytes();
+      assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (timestampColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        timestampColVector = new TimestampColumnVector();
+        bytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
+
+      convertVector(timestampColVector, bytesColVector, batchSize);
+    }
+  }
+
+  public static class StringGroupFromDateTreeReader extends ConvertTreeReader {
+
+    private DateTreeReader dateTreeReader;
+
+    private final TypeDescription readerType;
+    private LongColumnVector longColVector;
+    private BytesColumnVector bytesColVector;
+    private Date date;
+
+    StringGroupFromDateTreeReader(int columnId, TypeDescription readerType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      dateTreeReader = new DateTreeReader(columnId);
+      setConvertTreeReader(dateTreeReader);
+      date = new Date(0);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      date.setTime(DateWritable.daysToMillis((int) longColVector.vector[elementNum]));
+      String string = date.toString();
+      byte[] bytes = string.getBytes();
+      assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        bytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      dateTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, bytesColVector, batchSize);
+    }
+  }
+
+  public static class StringGroupFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private final TypeDescription readerType;
+
+    StringGroupFromStringGroupTreeReader(int columnId, TypeDescription fileType,
+        TypeDescription readerType) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      stringGroupTreeReader.nextVector(previousVector, isNull, batchSize);
+
+      BytesColumnVector resultColVector = (BytesColumnVector) previousVector;
+
+      if (resultColVector.isRepeating) {
+        if (resultColVector.noNulls || !resultColVector.isNull[0]) {
+          convertStringGroupVectorElement(resultColVector, 0, readerType);
+        } else {
+          // Remains null.
+        }
+      } else if (resultColVector.noNulls){
+        for (int i = 0; i < batchSize; i++) {
+          convertStringGroupVectorElement(resultColVector, i, readerType);
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!resultColVector.isNull[i]) {
+            convertStringGroupVectorElement(resultColVector, i, readerType);
+          } else {
+            // Remains null.
+          }
+        }
+      }
+    }
+  }
+
+  public static class StringGroupFromBinaryTreeReader extends ConvertTreeReader {
+
+    private BinaryTreeReader binaryTreeReader;
+
+    private final TypeDescription readerType;
+    private BytesColumnVector inBytesColVector;
+    private BytesColumnVector outBytesColVector;
+
+    StringGroupFromBinaryTreeReader(int columnId, TypeDescription readerType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.readerType = readerType;
+      binaryTreeReader = new BinaryTreeReader(columnId);
+      setConvertTreeReader(binaryTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      byte[] bytes = inBytesColVector.vector[elementNum];
+      int start = inBytesColVector.start[elementNum];
+      int length = inBytesColVector.length[elementNum];
+      byte[] string = new byte[length == 0 ? 0 : 3 * length - 1];
+      for(int p = 0; p < string.length; p += 2) {
+        if (p != 0) {
+          string[p++] = ' ';
+        }
+        int num = 0xff & bytes[start++];
+        int digit = num / 16;
+        string[p] = (byte)((digit) + (digit < 10 ? '0' : 'a' - 10));
+        digit = num % 16;
+        string[p + 1] = (byte)((digit) + (digit < 10 ? '0' : 'a' - 10));
+      }
+      assignStringGroupVectorEntry(outBytesColVector, elementNum, readerType,
+          string, 0, string.length);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (inBytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        inBytesColVector = new BytesColumnVector();
+        outBytesColVector = (BytesColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      binaryTreeReader.nextVector(inBytesColVector, isNull, batchSize);
+
+      convertVector(inBytesColVector, outBytesColVector, batchSize);
+    }
+  }
+
+  public static class TimestampFromAnyIntegerTreeReader extends ConvertTreeReader {
+
+    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
+
+    private LongColumnVector longColVector;
+    private TimestampColumnVector timestampColVector;
+
+    TimestampFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      anyIntegerAsLongTreeReader =
+          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+      setConvertTreeReader(anyIntegerAsLongTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      long longValue = longColVector.vector[elementNum];
+      // UNDONE: What does the boolean setting need to be?
+      timestampColVector.set(elementNum, new Timestamp(longValue));
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        timestampColVector = (TimestampColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, timestampColVector, batchSize);
+    }
+  }
+
+  public static class TimestampFromFloatTreeReader extends ConvertTreeReader {
+
+    private FloatTreeReader floatTreeReader;
+
+    private DoubleColumnVector doubleColVector;
+    private TimestampColumnVector timestampColVector;
+
+    TimestampFromFloatTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      floatTreeReader = new FloatTreeReader(columnId);
+      setConvertTreeReader(floatTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      float floatValue = (float) doubleColVector.vector[elementNum];
+      Timestamp timestampValue = TimestampUtils.doubleToTimestamp(floatValue);
+      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+      timestampColVector.set(elementNum, timestampValue);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        timestampColVector = (TimestampColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, timestampColVector, batchSize);
+    }
+  }
+
+  public static class TimestampFromDoubleTreeReader extends ConvertTreeReader {
+
+    private DoubleTreeReader doubleTreeReader;
+
+    private DoubleColumnVector doubleColVector;
+    private TimestampColumnVector timestampColVector;
+
+    TimestampFromDoubleTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      doubleTreeReader = new DoubleTreeReader(columnId);
+      setConvertTreeReader(doubleTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      double doubleValue = doubleColVector.vector[elementNum];
+      Timestamp timestampValue = TimestampUtils.doubleToTimestamp(doubleValue);
+      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+      timestampColVector.set(elementNum, timestampValue);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (doubleColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        doubleColVector = new DoubleColumnVector();
+        timestampColVector = (TimestampColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
+
+      convertVector(doubleColVector, timestampColVector, batchSize);
+    }
+  }
+
+  public static class TimestampFromDecimalTreeReader extends ConvertTreeReader {
+
+    private DecimalTreeReader decimalTreeReader;
+
+    private final int precision;
+    private final int scale;
+    private DecimalColumnVector decimalColVector;
+    private TimestampColumnVector timestampColVector;
+
+    TimestampFromDecimalTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      this.precision = fileType.getPrecision();
+      this.scale = fileType.getScale();
+      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
+      setConvertTreeReader(decimalTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      Timestamp timestampValue =
+            TimestampUtils.decimalToTimestamp(
+                decimalColVector.vector[elementNum].getHiveDecimal());
+      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
+      timestampColVector.set(elementNum, timestampValue);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (decimalColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        decimalColVector = new DecimalColumnVector(precision, scale);
+        timestampColVector = (TimestampColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
+
+      convertVector(decimalColVector, timestampColVector, batchSize);
+    }
+  }
+
+  public static class TimestampFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private BytesColumnVector bytesColVector;
+    private TimestampColumnVector timestampColVector;
+
+    TimestampFromStringGroupTreeReader(int columnId, TypeDescription fileType)
+        throws IOException {
+      super(columnId);
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String stringValue =
+          stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
+      Timestamp timestampValue = parseTimestampFromString(stringValue);
+      if (timestampValue != null) {
+        timestampColVector.set(elementNum, timestampValue);
+      } else {
+        timestampColVector.noNulls = false;
+        timestampColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (bytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        bytesColVector = new BytesColumnVector();
+        timestampColVector = (TimestampColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
+
+      convertVector(bytesColVector, timestampColVector, batchSize);
+    }
+  }
+
+  public static class TimestampFromDateTreeReader extends ConvertTreeReader {
+
+    private DateTreeReader dateTreeReader;
+
+    private LongColumnVector longColVector;
+    private TimestampColumnVector timestampColVector;
+
+    TimestampFromDateTreeReader(int columnId, TypeDescription fileType,
+        boolean skipCorrupt) throws IOException {
+      super(columnId);
+      dateTreeReader = new DateTreeReader(columnId);
+      setConvertTreeReader(dateTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) {
+      long millis =
+          DateWritable.daysToMillis((int) longColVector.vector[elementNum]);
+      timestampColVector.set(elementNum, new Timestamp(millis));
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (longColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        longColVector = new LongColumnVector();
+        timestampColVector = (TimestampColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      dateTreeReader.nextVector(longColVector, isNull, batchSize);
+
+      convertVector(longColVector, timestampColVector, batchSize);
+    }
+  }
+
+  public static class DateFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    private BytesColumnVector bytesColVector;
+    private LongColumnVector longColVector;
+
+    DateFromStringGroupTreeReader(int columnId, TypeDescription fileType)
+        throws IOException {
+      super(columnId);
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      String stringValue =
+          stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
+      Date dateValue = parseDateFromString(stringValue);
+      if (dateValue != null) {
+        longColVector.vector[elementNum] = DateWritable.dateToDays(dateValue);
+      } else {
+        longColVector.noNulls = false;
+        longColVector.isNull[elementNum] = true;
+      }
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (bytesColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        bytesColVector = new BytesColumnVector();
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
+
+      convertVector(bytesColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class DateFromTimestampTreeReader extends ConvertTreeReader {
+
+    private TimestampTreeReader timestampTreeReader;
+
+    private TimestampColumnVector timestampColVector;
+    private LongColumnVector longColVector;
+
+    DateFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      super(columnId);
+      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
+      setConvertTreeReader(timestampTreeReader);
+    }
+
+    @Override
+    public void setConvertVectorElement(int elementNum) throws IOException {
+      Date dateValue =
+          DateWritable.timeToDate(TimestampUtils.millisToSeconds(
+              timestampColVector.asScratchTimestamp(elementNum).getTime()));
+      longColVector.vector[elementNum] = DateWritable.dateToDays(dateValue);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      if (timestampColVector == null) {
+        // Allocate column vector for file; cast column vector for reader.
+        timestampColVector = new TimestampColumnVector();
+        longColVector = (LongColumnVector) previousVector;
+      }
+      // Read present/isNull stream
+      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
+
+      convertVector(timestampColVector, longColVector, batchSize);
+    }
+  }
+
+  public static class BinaryFromStringGroupTreeReader extends ConvertTreeReader {
+
+    private TreeReader stringGroupTreeReader;
+
+    BinaryFromStringGroupTreeReader(int columnId, TypeDescription fileType)
+        throws IOException {
+      super(columnId);
+      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
+      setConvertTreeReader(stringGroupTreeReader);
+    }
+
+    @Override
+    public void nextVector(ColumnVector previousVector,
+                           boolean[] isNull,
+                           final int batchSize) throws IOException {
+      super.nextVector(previousVector, isNull, batchSize);
+    }
+  }
+
+  private static TreeReader createAnyIntegerConvertTreeReader(int columnId,
+                                                              TypeDescription fileType,
+                                                              TypeDescription readerType,
+                                                              SchemaEvolution evolution,
+                                                              boolean[] included,
+                                                              boolean skipCorrupt) throws IOException {
+
+    // CONVERT from (BOOLEAN, BYTE, SHORT, INT, LONG) to schema type.
+    //
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      if (fileType.getCategory() == readerType.getCategory()) {
+        throw new IllegalArgumentException("No conversion of type " +
+            readerType.getCategory() + " to self needed");
+      }
+      return new AnyIntegerFromAnyIntegerTreeReader(columnId, fileType, readerType,
+          skipCorrupt);
+
+    case FLOAT:
+      return new FloatFromAnyIntegerTreeReader(columnId, fileType,
+          skipCorrupt);
+
+    case DOUBLE:
+      return new DoubleFromAnyIntegerTreeReader(columnId, fileType,
+          skipCorrupt);
+
+    case DECIMAL:
+      return new DecimalFromAnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+      return new StringGroupFromAnyIntegerTreeReader(columnId, fileType, readerType,
+          skipCorrupt);
+
+    case TIMESTAMP:
+      return new TimestampFromAnyIntegerTreeReader(columnId, fileType, skipCorrupt);
+
+    // Not currently supported conversion(s):
+    case BINARY:
+    case DATE:
+
+    case STRUCT:
+    case LIST:
+    case MAP:
+    case UNION:
+    default:
+      throw new IllegalArgumentException("Unsupported type " +
+          readerType.getCategory());
+    }
+  }
+
+  private static TreeReader createFloatConvertTreeReader(int columnId,
+                                                         TypeDescription fileType,
+                                                         TypeDescription readerType,
+                                                         SchemaEvolution evolution,
+                                                         boolean[] included,
+                                                         boolean skipCorrupt) throws IOException {
+
+    // CONVERT from FLOAT to schema type.
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      return new AnyIntegerFromFloatTreeReader(columnId, readerType);
+
+    case FLOAT:
+      throw new IllegalArgumentException("No conversion of type " +
+        readerType.getCategory() + " to self needed");
+
+    case DOUBLE:
+      return new DoubleFromFloatTreeReader(columnId);
+
+    case DECIMAL:
+      return new DecimalFromFloatTreeReader(columnId, readerType);
+
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+      return new StringGroupFromFloatTreeReader(columnId, readerType, skipCorrupt);
+
+    case TIMESTAMP:
+      return new TimestampFromFloatTreeReader(columnId, readerType, skipCorrupt);
+
+    // Not currently supported conversion(s):
+    case BINARY:
+    case DATE:
+
+    case STRUCT:
+    case LIST:
+    case MAP:
+    case UNION:
+    default:
+      throw new IllegalArgumentException("Unsupported type " +
+          readerType.getCategory());
+    }
+  }
+
+  private static TreeReader createDoubleConvertTreeReader(int columnId,
+                                                          TypeDescription fileType,
+                                                          TypeDescription readerType,
+                                                          SchemaEvolution evolution,
+                                                          boolean[] included,
+                                                          boolean skipCorrupt) throws IOException {
+
+    // CONVERT from DOUBLE to schema type.
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      return new AnyIntegerFromDoubleTreeReader(columnId, readerType);
+
+    case FLOAT:
+      return new FloatFromDoubleTreeReader(columnId);
+
+    case DOUBLE:
+      throw new IllegalArgumentException("No conversion of type " +
+        readerType.getCategory() + " to self needed");
+
+    case DECIMAL:
+      return new DecimalFromDoubleTreeReader(columnId, readerType);
+
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+      return new StringGroupFromDoubleTreeReader(columnId, readerType, skipCorrupt);
+
+    case TIMESTAMP:
+      return new TimestampFromDoubleTreeReader(columnId, readerType, skipCorrupt);
+
+    // Not currently supported conversion(s):
+    case BINARY:
+    case DATE:
+
+    case STRUCT:
+    case LIST:
+    case MAP:
+    case UNION:
+    default:
+      throw new IllegalArgumentException("Unsupported type " +
+          readerType.getCategory());
+    }
+  }
+
+  private static TreeReader createDecimalConvertTreeReader(int columnId,
+                                                           TypeDescription fileType,
+                                                           TypeDescription readerType,
+                                                           SchemaEvolution evolution,
+                                                           boolean[] included,
+                                                           boolean skipCorrupt) throws IOException {
+
+    // CONVERT from DECIMAL to schema type.
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      return new AnyIntegerFromDecimalTreeReader(columnId, fileType, readerType);
+
+    case FLOAT:
+      return new FloatFromDecimalTreeReader(columnId, fileType, readerType);
+
+    case DOUBLE:
+      return new DoubleFromDecimalTreeReader(columnId, fileType);
+
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+      return new StringGroupFromDecimalTreeReader(columnId, fileType, readerType, skipCorrupt);
+
+    case TIMESTAMP:
+      return new TimestampFromDecimalTreeReader(columnId, fileType, skipCorrupt);
+
+    case DECIMAL:
+      return new DecimalFromDecimalTreeReader(columnId, fileType, readerType);
+
+    // Not currently supported conversion(s):
+    case BINARY:
+    case DATE:
+
+    case STRUCT:
+    case LIST:
+    case MAP:
+    case UNION:
+    default:
+      throw new IllegalArgumentException("Unsupported type " +
+          readerType.getCategory());
+    }
+  }
+
+  private static TreeReader createStringConvertTreeReader(int columnId,
+                                                          TypeDescription fileType,
+                                                          TypeDescription readerType,
+                                                          SchemaEvolution evolution,
+                                                          boolean[] included,
+                                                          boolean skipCorrupt) throws IOException {
+
+    // CONVERT from STRING to schema type.
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      return new AnyIntegerFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case FLOAT:
+      return new FloatFromStringGroupTreeReader(columnId, fileType);
+
+    case DOUBLE:
+      return new DoubleFromStringGroupTreeReader(columnId, fileType);
+
+    case DECIMAL:
+      return new DecimalFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case CHAR:
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case VARCHAR:
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case STRING:
+      throw new IllegalArgumentException("No conversion of type " +
+          readerType.getCategory() + " to self needed");
+
+    case BINARY:
+      return new BinaryFromStringGroupTreeReader(columnId, fileType);
+
+    case TIMESTAMP:
+      return new TimestampFromStringGroupTreeReader(columnId, fileType);
+
+    case DATE:
+      return new DateFromStringGroupTreeReader(columnId, fileType);
+
+    // Not currently supported conversion(s):
+
+    case STRUCT:
+    case LIST:
+    case MAP:
+    case UNION:
+    default:
+      throw new IllegalArgumentException("Unsupported type " +
+          readerType.getCategory());
+    }
+  }
+
+  private static TreeReader createCharConvertTreeReader(int columnId,
+                                                        TypeDescription fileType,
+                                                        TypeDescription readerType,
+                                                        SchemaEvolution evolution,
+                                                        boolean[] included,
+                                                        boolean skipCorrupt) throws IOException {
+
+    // CONVERT from CHAR to schema type.
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      return new AnyIntegerFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case FLOAT:
+      return new FloatFromStringGroupTreeReader(columnId, fileType);
+
+    case DOUBLE:
+      return new DoubleFromStringGroupTreeReader(columnId, fileType);
+
+    case DECIMAL:
+      return new DecimalFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case STRING:
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case VARCHAR:
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case CHAR:
+      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
+
+    case BINARY:
+      return new BinaryFromStringGroupTreeReader(columnId, fileType);
+
+    case TIMESTAMP:
+      return new TimestampFromStringGroupTreeReader(columnId, fileType);
+
+    case DATE:
+      return new DateFromStringGroupTreeReader(columnId, fileTyp

<TRUNCATED>

[35/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java
----------------------------------------------------------------------
diff --git a/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java b/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java
deleted file mode 100644
index 32193ec..0000000
--- a/orc/src/gen/protobuf-java/org/apache/orc/OrcProto.java
+++ /dev/null
@@ -1,20179 +0,0 @@
-// Generated by the protocol buffer compiler.  DO NOT EDIT!
-// source: orc_proto.proto
-
-package org.apache.orc;
-
-public final class OrcProto {
-  private OrcProto() {}
-  public static void registerAllExtensions(
-      com.google.protobuf.ExtensionRegistry registry) {
-  }
-  /**
-   * Protobuf enum {@code orc.proto.CompressionKind}
-   */
-  public enum CompressionKind
-      implements com.google.protobuf.ProtocolMessageEnum {
-    /**
-     * <code>NONE = 0;</code>
-     */
-    NONE(0, 0),
-    /**
-     * <code>ZLIB = 1;</code>
-     */
-    ZLIB(1, 1),
-    /**
-     * <code>SNAPPY = 2;</code>
-     */
-    SNAPPY(2, 2),
-    /**
-     * <code>LZO = 3;</code>
-     */
-    LZO(3, 3),
-    ;
-
-    /**
-     * <code>NONE = 0;</code>
-     */
-    public static final int NONE_VALUE = 0;
-    /**
-     * <code>ZLIB = 1;</code>
-     */
-    public static final int ZLIB_VALUE = 1;
-    /**
-     * <code>SNAPPY = 2;</code>
-     */
-    public static final int SNAPPY_VALUE = 2;
-    /**
-     * <code>LZO = 3;</code>
-     */
-    public static final int LZO_VALUE = 3;
-
-
-    public final int getNumber() { return value; }
-
-    public static CompressionKind valueOf(int value) {
-      switch (value) {
-        case 0: return NONE;
-        case 1: return ZLIB;
-        case 2: return SNAPPY;
-        case 3: return LZO;
-        default: return null;
-      }
-    }
-
-    public static com.google.protobuf.Internal.EnumLiteMap<CompressionKind>
-        internalGetValueMap() {
-      return internalValueMap;
-    }
-    private static com.google.protobuf.Internal.EnumLiteMap<CompressionKind>
-        internalValueMap =
-          new com.google.protobuf.Internal.EnumLiteMap<CompressionKind>() {
-            public CompressionKind findValueByNumber(int number) {
-              return CompressionKind.valueOf(number);
-            }
-          };
-
-    public final com.google.protobuf.Descriptors.EnumValueDescriptor
-        getValueDescriptor() {
-      return getDescriptor().getValues().get(index);
-    }
-    public final com.google.protobuf.Descriptors.EnumDescriptor
-        getDescriptorForType() {
-      return getDescriptor();
-    }
-    public static final com.google.protobuf.Descriptors.EnumDescriptor
-        getDescriptor() {
-      return org.apache.orc.OrcProto.getDescriptor().getEnumTypes().get(0);
-    }
-
-    private static final CompressionKind[] VALUES = values();
-
-    public static CompressionKind valueOf(
-        com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
-      if (desc.getType() != getDescriptor()) {
-        throw new java.lang.IllegalArgumentException(
-          "EnumValueDescriptor is not for this type.");
-      }
-      return VALUES[desc.getIndex()];
-    }
-
-    private final int index;
-    private final int value;
-
-    private CompressionKind(int index, int value) {
-      this.index = index;
-      this.value = value;
-    }
-
-    // @@protoc_insertion_point(enum_scope:orc.proto.CompressionKind)
-  }
-
-  public interface IntegerStatisticsOrBuilder
-      extends com.google.protobuf.MessageOrBuilder {
-
-    // optional sint64 minimum = 1;
-    /**
-     * <code>optional sint64 minimum = 1;</code>
-     */
-    boolean hasMinimum();
-    /**
-     * <code>optional sint64 minimum = 1;</code>
-     */
-    long getMinimum();
-
-    // optional sint64 maximum = 2;
-    /**
-     * <code>optional sint64 maximum = 2;</code>
-     */
-    boolean hasMaximum();
-    /**
-     * <code>optional sint64 maximum = 2;</code>
-     */
-    long getMaximum();
-
-    // optional sint64 sum = 3;
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     */
-    boolean hasSum();
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     */
-    long getSum();
-  }
-  /**
-   * Protobuf type {@code orc.proto.IntegerStatistics}
-   */
-  public static final class IntegerStatistics extends
-      com.google.protobuf.GeneratedMessage
-      implements IntegerStatisticsOrBuilder {
-    // Use IntegerStatistics.newBuilder() to construct.
-    private IntegerStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
-      super(builder);
-      this.unknownFields = builder.getUnknownFields();
-    }
-    private IntegerStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
-
-    private static final IntegerStatistics defaultInstance;
-    public static IntegerStatistics getDefaultInstance() {
-      return defaultInstance;
-    }
-
-    public IntegerStatistics getDefaultInstanceForType() {
-      return defaultInstance;
-    }
-
-    private final com.google.protobuf.UnknownFieldSet unknownFields;
-    @java.lang.Override
-    public final com.google.protobuf.UnknownFieldSet
-        getUnknownFields() {
-      return this.unknownFields;
-    }
-    private IntegerStatistics(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      initFields();
-      int mutable_bitField0_ = 0;
-      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
-          com.google.protobuf.UnknownFieldSet.newBuilder();
-      try {
-        boolean done = false;
-        while (!done) {
-          int tag = input.readTag();
-          switch (tag) {
-            case 0:
-              done = true;
-              break;
-            default: {
-              if (!parseUnknownField(input, unknownFields,
-                                     extensionRegistry, tag)) {
-                done = true;
-              }
-              break;
-            }
-            case 8: {
-              bitField0_ |= 0x00000001;
-              minimum_ = input.readSInt64();
-              break;
-            }
-            case 16: {
-              bitField0_ |= 0x00000002;
-              maximum_ = input.readSInt64();
-              break;
-            }
-            case 24: {
-              bitField0_ |= 0x00000004;
-              sum_ = input.readSInt64();
-              break;
-            }
-          }
-        }
-      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-        throw e.setUnfinishedMessage(this);
-      } catch (java.io.IOException e) {
-        throw new com.google.protobuf.InvalidProtocolBufferException(
-            e.getMessage()).setUnfinishedMessage(this);
-      } finally {
-        this.unknownFields = unknownFields.build();
-        makeExtensionsImmutable();
-      }
-    }
-    public static final com.google.protobuf.Descriptors.Descriptor
-        getDescriptor() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_IntegerStatistics_descriptor;
-    }
-
-    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-        internalGetFieldAccessorTable() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_IntegerStatistics_fieldAccessorTable
-          .ensureFieldAccessorsInitialized(
-              org.apache.orc.OrcProto.IntegerStatistics.class, org.apache.orc.OrcProto.IntegerStatistics.Builder.class);
-    }
-
-    public static com.google.protobuf.Parser<IntegerStatistics> PARSER =
-        new com.google.protobuf.AbstractParser<IntegerStatistics>() {
-      public IntegerStatistics parsePartialFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws com.google.protobuf.InvalidProtocolBufferException {
-        return new IntegerStatistics(input, extensionRegistry);
-      }
-    };
-
-    @java.lang.Override
-    public com.google.protobuf.Parser<IntegerStatistics> getParserForType() {
-      return PARSER;
-    }
-
-    private int bitField0_;
-    // optional sint64 minimum = 1;
-    public static final int MINIMUM_FIELD_NUMBER = 1;
-    private long minimum_;
-    /**
-     * <code>optional sint64 minimum = 1;</code>
-     */
-    public boolean hasMinimum() {
-      return ((bitField0_ & 0x00000001) == 0x00000001);
-    }
-    /**
-     * <code>optional sint64 minimum = 1;</code>
-     */
-    public long getMinimum() {
-      return minimum_;
-    }
-
-    // optional sint64 maximum = 2;
-    public static final int MAXIMUM_FIELD_NUMBER = 2;
-    private long maximum_;
-    /**
-     * <code>optional sint64 maximum = 2;</code>
-     */
-    public boolean hasMaximum() {
-      return ((bitField0_ & 0x00000002) == 0x00000002);
-    }
-    /**
-     * <code>optional sint64 maximum = 2;</code>
-     */
-    public long getMaximum() {
-      return maximum_;
-    }
-
-    // optional sint64 sum = 3;
-    public static final int SUM_FIELD_NUMBER = 3;
-    private long sum_;
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     */
-    public boolean hasSum() {
-      return ((bitField0_ & 0x00000004) == 0x00000004);
-    }
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     */
-    public long getSum() {
-      return sum_;
-    }
-
-    private void initFields() {
-      minimum_ = 0L;
-      maximum_ = 0L;
-      sum_ = 0L;
-    }
-    private byte memoizedIsInitialized = -1;
-    public final boolean isInitialized() {
-      byte isInitialized = memoizedIsInitialized;
-      if (isInitialized != -1) return isInitialized == 1;
-
-      memoizedIsInitialized = 1;
-      return true;
-    }
-
-    public void writeTo(com.google.protobuf.CodedOutputStream output)
-                        throws java.io.IOException {
-      getSerializedSize();
-      if (((bitField0_ & 0x00000001) == 0x00000001)) {
-        output.writeSInt64(1, minimum_);
-      }
-      if (((bitField0_ & 0x00000002) == 0x00000002)) {
-        output.writeSInt64(2, maximum_);
-      }
-      if (((bitField0_ & 0x00000004) == 0x00000004)) {
-        output.writeSInt64(3, sum_);
-      }
-      getUnknownFields().writeTo(output);
-    }
-
-    private int memoizedSerializedSize = -1;
-    public int getSerializedSize() {
-      int size = memoizedSerializedSize;
-      if (size != -1) return size;
-
-      size = 0;
-      if (((bitField0_ & 0x00000001) == 0x00000001)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeSInt64Size(1, minimum_);
-      }
-      if (((bitField0_ & 0x00000002) == 0x00000002)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeSInt64Size(2, maximum_);
-      }
-      if (((bitField0_ & 0x00000004) == 0x00000004)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeSInt64Size(3, sum_);
-      }
-      size += getUnknownFields().getSerializedSize();
-      memoizedSerializedSize = size;
-      return size;
-    }
-
-    private static final long serialVersionUID = 0L;
-    @java.lang.Override
-    protected java.lang.Object writeReplace()
-        throws java.io.ObjectStreamException {
-      return super.writeReplace();
-    }
-
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(
-        com.google.protobuf.ByteString data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(
-        com.google.protobuf.ByteString data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(byte[] data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(
-        byte[] data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseDelimitedFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseDelimitedFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.IntegerStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-
-    public static Builder newBuilder() { return Builder.create(); }
-    public Builder newBuilderForType() { return newBuilder(); }
-    public static Builder newBuilder(org.apache.orc.OrcProto.IntegerStatistics prototype) {
-      return newBuilder().mergeFrom(prototype);
-    }
-    public Builder toBuilder() { return newBuilder(this); }
-
-    @java.lang.Override
-    protected Builder newBuilderForType(
-        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-      Builder builder = new Builder(parent);
-      return builder;
-    }
-    /**
-     * Protobuf type {@code orc.proto.IntegerStatistics}
-     */
-    public static final class Builder extends
-        com.google.protobuf.GeneratedMessage.Builder<Builder>
-       implements org.apache.orc.OrcProto.IntegerStatisticsOrBuilder {
-      public static final com.google.protobuf.Descriptors.Descriptor
-          getDescriptor() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_IntegerStatistics_descriptor;
-      }
-
-      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-          internalGetFieldAccessorTable() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_IntegerStatistics_fieldAccessorTable
-            .ensureFieldAccessorsInitialized(
-                org.apache.orc.OrcProto.IntegerStatistics.class, org.apache.orc.OrcProto.IntegerStatistics.Builder.class);
-      }
-
-      // Construct using org.apache.orc.OrcProto.IntegerStatistics.newBuilder()
-      private Builder() {
-        maybeForceBuilderInitialization();
-      }
-
-      private Builder(
-          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-        super(parent);
-        maybeForceBuilderInitialization();
-      }
-      private void maybeForceBuilderInitialization() {
-        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
-        }
-      }
-      private static Builder create() {
-        return new Builder();
-      }
-
-      public Builder clear() {
-        super.clear();
-        minimum_ = 0L;
-        bitField0_ = (bitField0_ & ~0x00000001);
-        maximum_ = 0L;
-        bitField0_ = (bitField0_ & ~0x00000002);
-        sum_ = 0L;
-        bitField0_ = (bitField0_ & ~0x00000004);
-        return this;
-      }
-
-      public Builder clone() {
-        return create().mergeFrom(buildPartial());
-      }
-
-      public com.google.protobuf.Descriptors.Descriptor
-          getDescriptorForType() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_IntegerStatistics_descriptor;
-      }
-
-      public org.apache.orc.OrcProto.IntegerStatistics getDefaultInstanceForType() {
-        return org.apache.orc.OrcProto.IntegerStatistics.getDefaultInstance();
-      }
-
-      public org.apache.orc.OrcProto.IntegerStatistics build() {
-        org.apache.orc.OrcProto.IntegerStatistics result = buildPartial();
-        if (!result.isInitialized()) {
-          throw newUninitializedMessageException(result);
-        }
-        return result;
-      }
-
-      public org.apache.orc.OrcProto.IntegerStatistics buildPartial() {
-        org.apache.orc.OrcProto.IntegerStatistics result = new org.apache.orc.OrcProto.IntegerStatistics(this);
-        int from_bitField0_ = bitField0_;
-        int to_bitField0_ = 0;
-        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
-          to_bitField0_ |= 0x00000001;
-        }
-        result.minimum_ = minimum_;
-        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
-          to_bitField0_ |= 0x00000002;
-        }
-        result.maximum_ = maximum_;
-        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
-          to_bitField0_ |= 0x00000004;
-        }
-        result.sum_ = sum_;
-        result.bitField0_ = to_bitField0_;
-        onBuilt();
-        return result;
-      }
-
-      public Builder mergeFrom(com.google.protobuf.Message other) {
-        if (other instanceof org.apache.orc.OrcProto.IntegerStatistics) {
-          return mergeFrom((org.apache.orc.OrcProto.IntegerStatistics)other);
-        } else {
-          super.mergeFrom(other);
-          return this;
-        }
-      }
-
-      public Builder mergeFrom(org.apache.orc.OrcProto.IntegerStatistics other) {
-        if (other == org.apache.orc.OrcProto.IntegerStatistics.getDefaultInstance()) return this;
-        if (other.hasMinimum()) {
-          setMinimum(other.getMinimum());
-        }
-        if (other.hasMaximum()) {
-          setMaximum(other.getMaximum());
-        }
-        if (other.hasSum()) {
-          setSum(other.getSum());
-        }
-        this.mergeUnknownFields(other.getUnknownFields());
-        return this;
-      }
-
-      public final boolean isInitialized() {
-        return true;
-      }
-
-      public Builder mergeFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws java.io.IOException {
-        org.apache.orc.OrcProto.IntegerStatistics parsedMessage = null;
-        try {
-          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
-        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-          parsedMessage = (org.apache.orc.OrcProto.IntegerStatistics) e.getUnfinishedMessage();
-          throw e;
-        } finally {
-          if (parsedMessage != null) {
-            mergeFrom(parsedMessage);
-          }
-        }
-        return this;
-      }
-      private int bitField0_;
-
-      // optional sint64 minimum = 1;
-      private long minimum_ ;
-      /**
-       * <code>optional sint64 minimum = 1;</code>
-       */
-      public boolean hasMinimum() {
-        return ((bitField0_ & 0x00000001) == 0x00000001);
-      }
-      /**
-       * <code>optional sint64 minimum = 1;</code>
-       */
-      public long getMinimum() {
-        return minimum_;
-      }
-      /**
-       * <code>optional sint64 minimum = 1;</code>
-       */
-      public Builder setMinimum(long value) {
-        bitField0_ |= 0x00000001;
-        minimum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional sint64 minimum = 1;</code>
-       */
-      public Builder clearMinimum() {
-        bitField0_ = (bitField0_ & ~0x00000001);
-        minimum_ = 0L;
-        onChanged();
-        return this;
-      }
-
-      // optional sint64 maximum = 2;
-      private long maximum_ ;
-      /**
-       * <code>optional sint64 maximum = 2;</code>
-       */
-      public boolean hasMaximum() {
-        return ((bitField0_ & 0x00000002) == 0x00000002);
-      }
-      /**
-       * <code>optional sint64 maximum = 2;</code>
-       */
-      public long getMaximum() {
-        return maximum_;
-      }
-      /**
-       * <code>optional sint64 maximum = 2;</code>
-       */
-      public Builder setMaximum(long value) {
-        bitField0_ |= 0x00000002;
-        maximum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional sint64 maximum = 2;</code>
-       */
-      public Builder clearMaximum() {
-        bitField0_ = (bitField0_ & ~0x00000002);
-        maximum_ = 0L;
-        onChanged();
-        return this;
-      }
-
-      // optional sint64 sum = 3;
-      private long sum_ ;
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       */
-      public boolean hasSum() {
-        return ((bitField0_ & 0x00000004) == 0x00000004);
-      }
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       */
-      public long getSum() {
-        return sum_;
-      }
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       */
-      public Builder setSum(long value) {
-        bitField0_ |= 0x00000004;
-        sum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       */
-      public Builder clearSum() {
-        bitField0_ = (bitField0_ & ~0x00000004);
-        sum_ = 0L;
-        onChanged();
-        return this;
-      }
-
-      // @@protoc_insertion_point(builder_scope:orc.proto.IntegerStatistics)
-    }
-
-    static {
-      defaultInstance = new IntegerStatistics(true);
-      defaultInstance.initFields();
-    }
-
-    // @@protoc_insertion_point(class_scope:orc.proto.IntegerStatistics)
-  }
-
-  public interface DoubleStatisticsOrBuilder
-      extends com.google.protobuf.MessageOrBuilder {
-
-    // optional double minimum = 1;
-    /**
-     * <code>optional double minimum = 1;</code>
-     */
-    boolean hasMinimum();
-    /**
-     * <code>optional double minimum = 1;</code>
-     */
-    double getMinimum();
-
-    // optional double maximum = 2;
-    /**
-     * <code>optional double maximum = 2;</code>
-     */
-    boolean hasMaximum();
-    /**
-     * <code>optional double maximum = 2;</code>
-     */
-    double getMaximum();
-
-    // optional double sum = 3;
-    /**
-     * <code>optional double sum = 3;</code>
-     */
-    boolean hasSum();
-    /**
-     * <code>optional double sum = 3;</code>
-     */
-    double getSum();
-  }
-  /**
-   * Protobuf type {@code orc.proto.DoubleStatistics}
-   */
-  public static final class DoubleStatistics extends
-      com.google.protobuf.GeneratedMessage
-      implements DoubleStatisticsOrBuilder {
-    // Use DoubleStatistics.newBuilder() to construct.
-    private DoubleStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
-      super(builder);
-      this.unknownFields = builder.getUnknownFields();
-    }
-    private DoubleStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
-
-    private static final DoubleStatistics defaultInstance;
-    public static DoubleStatistics getDefaultInstance() {
-      return defaultInstance;
-    }
-
-    public DoubleStatistics getDefaultInstanceForType() {
-      return defaultInstance;
-    }
-
-    private final com.google.protobuf.UnknownFieldSet unknownFields;
-    @java.lang.Override
-    public final com.google.protobuf.UnknownFieldSet
-        getUnknownFields() {
-      return this.unknownFields;
-    }
-    private DoubleStatistics(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      initFields();
-      int mutable_bitField0_ = 0;
-      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
-          com.google.protobuf.UnknownFieldSet.newBuilder();
-      try {
-        boolean done = false;
-        while (!done) {
-          int tag = input.readTag();
-          switch (tag) {
-            case 0:
-              done = true;
-              break;
-            default: {
-              if (!parseUnknownField(input, unknownFields,
-                                     extensionRegistry, tag)) {
-                done = true;
-              }
-              break;
-            }
-            case 9: {
-              bitField0_ |= 0x00000001;
-              minimum_ = input.readDouble();
-              break;
-            }
-            case 17: {
-              bitField0_ |= 0x00000002;
-              maximum_ = input.readDouble();
-              break;
-            }
-            case 25: {
-              bitField0_ |= 0x00000004;
-              sum_ = input.readDouble();
-              break;
-            }
-          }
-        }
-      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-        throw e.setUnfinishedMessage(this);
-      } catch (java.io.IOException e) {
-        throw new com.google.protobuf.InvalidProtocolBufferException(
-            e.getMessage()).setUnfinishedMessage(this);
-      } finally {
-        this.unknownFields = unknownFields.build();
-        makeExtensionsImmutable();
-      }
-    }
-    public static final com.google.protobuf.Descriptors.Descriptor
-        getDescriptor() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_DoubleStatistics_descriptor;
-    }
-
-    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-        internalGetFieldAccessorTable() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_DoubleStatistics_fieldAccessorTable
-          .ensureFieldAccessorsInitialized(
-              org.apache.orc.OrcProto.DoubleStatistics.class, org.apache.orc.OrcProto.DoubleStatistics.Builder.class);
-    }
-
-    public static com.google.protobuf.Parser<DoubleStatistics> PARSER =
-        new com.google.protobuf.AbstractParser<DoubleStatistics>() {
-      public DoubleStatistics parsePartialFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws com.google.protobuf.InvalidProtocolBufferException {
-        return new DoubleStatistics(input, extensionRegistry);
-      }
-    };
-
-    @java.lang.Override
-    public com.google.protobuf.Parser<DoubleStatistics> getParserForType() {
-      return PARSER;
-    }
-
-    private int bitField0_;
-    // optional double minimum = 1;
-    public static final int MINIMUM_FIELD_NUMBER = 1;
-    private double minimum_;
-    /**
-     * <code>optional double minimum = 1;</code>
-     */
-    public boolean hasMinimum() {
-      return ((bitField0_ & 0x00000001) == 0x00000001);
-    }
-    /**
-     * <code>optional double minimum = 1;</code>
-     */
-    public double getMinimum() {
-      return minimum_;
-    }
-
-    // optional double maximum = 2;
-    public static final int MAXIMUM_FIELD_NUMBER = 2;
-    private double maximum_;
-    /**
-     * <code>optional double maximum = 2;</code>
-     */
-    public boolean hasMaximum() {
-      return ((bitField0_ & 0x00000002) == 0x00000002);
-    }
-    /**
-     * <code>optional double maximum = 2;</code>
-     */
-    public double getMaximum() {
-      return maximum_;
-    }
-
-    // optional double sum = 3;
-    public static final int SUM_FIELD_NUMBER = 3;
-    private double sum_;
-    /**
-     * <code>optional double sum = 3;</code>
-     */
-    public boolean hasSum() {
-      return ((bitField0_ & 0x00000004) == 0x00000004);
-    }
-    /**
-     * <code>optional double sum = 3;</code>
-     */
-    public double getSum() {
-      return sum_;
-    }
-
-    private void initFields() {
-      minimum_ = 0D;
-      maximum_ = 0D;
-      sum_ = 0D;
-    }
-    private byte memoizedIsInitialized = -1;
-    public final boolean isInitialized() {
-      byte isInitialized = memoizedIsInitialized;
-      if (isInitialized != -1) return isInitialized == 1;
-
-      memoizedIsInitialized = 1;
-      return true;
-    }
-
-    public void writeTo(com.google.protobuf.CodedOutputStream output)
-                        throws java.io.IOException {
-      getSerializedSize();
-      if (((bitField0_ & 0x00000001) == 0x00000001)) {
-        output.writeDouble(1, minimum_);
-      }
-      if (((bitField0_ & 0x00000002) == 0x00000002)) {
-        output.writeDouble(2, maximum_);
-      }
-      if (((bitField0_ & 0x00000004) == 0x00000004)) {
-        output.writeDouble(3, sum_);
-      }
-      getUnknownFields().writeTo(output);
-    }
-
-    private int memoizedSerializedSize = -1;
-    public int getSerializedSize() {
-      int size = memoizedSerializedSize;
-      if (size != -1) return size;
-
-      size = 0;
-      if (((bitField0_ & 0x00000001) == 0x00000001)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeDoubleSize(1, minimum_);
-      }
-      if (((bitField0_ & 0x00000002) == 0x00000002)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeDoubleSize(2, maximum_);
-      }
-      if (((bitField0_ & 0x00000004) == 0x00000004)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeDoubleSize(3, sum_);
-      }
-      size += getUnknownFields().getSerializedSize();
-      memoizedSerializedSize = size;
-      return size;
-    }
-
-    private static final long serialVersionUID = 0L;
-    @java.lang.Override
-    protected java.lang.Object writeReplace()
-        throws java.io.ObjectStreamException {
-      return super.writeReplace();
-    }
-
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(
-        com.google.protobuf.ByteString data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(
-        com.google.protobuf.ByteString data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(byte[] data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(
-        byte[] data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseDelimitedFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseDelimitedFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.DoubleStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-
-    public static Builder newBuilder() { return Builder.create(); }
-    public Builder newBuilderForType() { return newBuilder(); }
-    public static Builder newBuilder(org.apache.orc.OrcProto.DoubleStatistics prototype) {
-      return newBuilder().mergeFrom(prototype);
-    }
-    public Builder toBuilder() { return newBuilder(this); }
-
-    @java.lang.Override
-    protected Builder newBuilderForType(
-        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-      Builder builder = new Builder(parent);
-      return builder;
-    }
-    /**
-     * Protobuf type {@code orc.proto.DoubleStatistics}
-     */
-    public static final class Builder extends
-        com.google.protobuf.GeneratedMessage.Builder<Builder>
-       implements org.apache.orc.OrcProto.DoubleStatisticsOrBuilder {
-      public static final com.google.protobuf.Descriptors.Descriptor
-          getDescriptor() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_DoubleStatistics_descriptor;
-      }
-
-      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-          internalGetFieldAccessorTable() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_DoubleStatistics_fieldAccessorTable
-            .ensureFieldAccessorsInitialized(
-                org.apache.orc.OrcProto.DoubleStatistics.class, org.apache.orc.OrcProto.DoubleStatistics.Builder.class);
-      }
-
-      // Construct using org.apache.orc.OrcProto.DoubleStatistics.newBuilder()
-      private Builder() {
-        maybeForceBuilderInitialization();
-      }
-
-      private Builder(
-          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-        super(parent);
-        maybeForceBuilderInitialization();
-      }
-      private void maybeForceBuilderInitialization() {
-        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
-        }
-      }
-      private static Builder create() {
-        return new Builder();
-      }
-
-      public Builder clear() {
-        super.clear();
-        minimum_ = 0D;
-        bitField0_ = (bitField0_ & ~0x00000001);
-        maximum_ = 0D;
-        bitField0_ = (bitField0_ & ~0x00000002);
-        sum_ = 0D;
-        bitField0_ = (bitField0_ & ~0x00000004);
-        return this;
-      }
-
-      public Builder clone() {
-        return create().mergeFrom(buildPartial());
-      }
-
-      public com.google.protobuf.Descriptors.Descriptor
-          getDescriptorForType() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_DoubleStatistics_descriptor;
-      }
-
-      public org.apache.orc.OrcProto.DoubleStatistics getDefaultInstanceForType() {
-        return org.apache.orc.OrcProto.DoubleStatistics.getDefaultInstance();
-      }
-
-      public org.apache.orc.OrcProto.DoubleStatistics build() {
-        org.apache.orc.OrcProto.DoubleStatistics result = buildPartial();
-        if (!result.isInitialized()) {
-          throw newUninitializedMessageException(result);
-        }
-        return result;
-      }
-
-      public org.apache.orc.OrcProto.DoubleStatistics buildPartial() {
-        org.apache.orc.OrcProto.DoubleStatistics result = new org.apache.orc.OrcProto.DoubleStatistics(this);
-        int from_bitField0_ = bitField0_;
-        int to_bitField0_ = 0;
-        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
-          to_bitField0_ |= 0x00000001;
-        }
-        result.minimum_ = minimum_;
-        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
-          to_bitField0_ |= 0x00000002;
-        }
-        result.maximum_ = maximum_;
-        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
-          to_bitField0_ |= 0x00000004;
-        }
-        result.sum_ = sum_;
-        result.bitField0_ = to_bitField0_;
-        onBuilt();
-        return result;
-      }
-
-      public Builder mergeFrom(com.google.protobuf.Message other) {
-        if (other instanceof org.apache.orc.OrcProto.DoubleStatistics) {
-          return mergeFrom((org.apache.orc.OrcProto.DoubleStatistics)other);
-        } else {
-          super.mergeFrom(other);
-          return this;
-        }
-      }
-
-      public Builder mergeFrom(org.apache.orc.OrcProto.DoubleStatistics other) {
-        if (other == org.apache.orc.OrcProto.DoubleStatistics.getDefaultInstance()) return this;
-        if (other.hasMinimum()) {
-          setMinimum(other.getMinimum());
-        }
-        if (other.hasMaximum()) {
-          setMaximum(other.getMaximum());
-        }
-        if (other.hasSum()) {
-          setSum(other.getSum());
-        }
-        this.mergeUnknownFields(other.getUnknownFields());
-        return this;
-      }
-
-      public final boolean isInitialized() {
-        return true;
-      }
-
-      public Builder mergeFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws java.io.IOException {
-        org.apache.orc.OrcProto.DoubleStatistics parsedMessage = null;
-        try {
-          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
-        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-          parsedMessage = (org.apache.orc.OrcProto.DoubleStatistics) e.getUnfinishedMessage();
-          throw e;
-        } finally {
-          if (parsedMessage != null) {
-            mergeFrom(parsedMessage);
-          }
-        }
-        return this;
-      }
-      private int bitField0_;
-
-      // optional double minimum = 1;
-      private double minimum_ ;
-      /**
-       * <code>optional double minimum = 1;</code>
-       */
-      public boolean hasMinimum() {
-        return ((bitField0_ & 0x00000001) == 0x00000001);
-      }
-      /**
-       * <code>optional double minimum = 1;</code>
-       */
-      public double getMinimum() {
-        return minimum_;
-      }
-      /**
-       * <code>optional double minimum = 1;</code>
-       */
-      public Builder setMinimum(double value) {
-        bitField0_ |= 0x00000001;
-        minimum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional double minimum = 1;</code>
-       */
-      public Builder clearMinimum() {
-        bitField0_ = (bitField0_ & ~0x00000001);
-        minimum_ = 0D;
-        onChanged();
-        return this;
-      }
-
-      // optional double maximum = 2;
-      private double maximum_ ;
-      /**
-       * <code>optional double maximum = 2;</code>
-       */
-      public boolean hasMaximum() {
-        return ((bitField0_ & 0x00000002) == 0x00000002);
-      }
-      /**
-       * <code>optional double maximum = 2;</code>
-       */
-      public double getMaximum() {
-        return maximum_;
-      }
-      /**
-       * <code>optional double maximum = 2;</code>
-       */
-      public Builder setMaximum(double value) {
-        bitField0_ |= 0x00000002;
-        maximum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional double maximum = 2;</code>
-       */
-      public Builder clearMaximum() {
-        bitField0_ = (bitField0_ & ~0x00000002);
-        maximum_ = 0D;
-        onChanged();
-        return this;
-      }
-
-      // optional double sum = 3;
-      private double sum_ ;
-      /**
-       * <code>optional double sum = 3;</code>
-       */
-      public boolean hasSum() {
-        return ((bitField0_ & 0x00000004) == 0x00000004);
-      }
-      /**
-       * <code>optional double sum = 3;</code>
-       */
-      public double getSum() {
-        return sum_;
-      }
-      /**
-       * <code>optional double sum = 3;</code>
-       */
-      public Builder setSum(double value) {
-        bitField0_ |= 0x00000004;
-        sum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional double sum = 3;</code>
-       */
-      public Builder clearSum() {
-        bitField0_ = (bitField0_ & ~0x00000004);
-        sum_ = 0D;
-        onChanged();
-        return this;
-      }
-
-      // @@protoc_insertion_point(builder_scope:orc.proto.DoubleStatistics)
-    }
-
-    static {
-      defaultInstance = new DoubleStatistics(true);
-      defaultInstance.initFields();
-    }
-
-    // @@protoc_insertion_point(class_scope:orc.proto.DoubleStatistics)
-  }
-
-  public interface StringStatisticsOrBuilder
-      extends com.google.protobuf.MessageOrBuilder {
-
-    // optional string minimum = 1;
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    boolean hasMinimum();
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    java.lang.String getMinimum();
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    com.google.protobuf.ByteString
-        getMinimumBytes();
-
-    // optional string maximum = 2;
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    boolean hasMaximum();
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    java.lang.String getMaximum();
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    com.google.protobuf.ByteString
-        getMaximumBytes();
-
-    // optional sint64 sum = 3;
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     *
-     * <pre>
-     * sum will store the total length of all strings in a stripe
-     * </pre>
-     */
-    boolean hasSum();
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     *
-     * <pre>
-     * sum will store the total length of all strings in a stripe
-     * </pre>
-     */
-    long getSum();
-  }
-  /**
-   * Protobuf type {@code orc.proto.StringStatistics}
-   */
-  public static final class StringStatistics extends
-      com.google.protobuf.GeneratedMessage
-      implements StringStatisticsOrBuilder {
-    // Use StringStatistics.newBuilder() to construct.
-    private StringStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
-      super(builder);
-      this.unknownFields = builder.getUnknownFields();
-    }
-    private StringStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
-
-    private static final StringStatistics defaultInstance;
-    public static StringStatistics getDefaultInstance() {
-      return defaultInstance;
-    }
-
-    public StringStatistics getDefaultInstanceForType() {
-      return defaultInstance;
-    }
-
-    private final com.google.protobuf.UnknownFieldSet unknownFields;
-    @java.lang.Override
-    public final com.google.protobuf.UnknownFieldSet
-        getUnknownFields() {
-      return this.unknownFields;
-    }
-    private StringStatistics(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      initFields();
-      int mutable_bitField0_ = 0;
-      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
-          com.google.protobuf.UnknownFieldSet.newBuilder();
-      try {
-        boolean done = false;
-        while (!done) {
-          int tag = input.readTag();
-          switch (tag) {
-            case 0:
-              done = true;
-              break;
-            default: {
-              if (!parseUnknownField(input, unknownFields,
-                                     extensionRegistry, tag)) {
-                done = true;
-              }
-              break;
-            }
-            case 10: {
-              bitField0_ |= 0x00000001;
-              minimum_ = input.readBytes();
-              break;
-            }
-            case 18: {
-              bitField0_ |= 0x00000002;
-              maximum_ = input.readBytes();
-              break;
-            }
-            case 24: {
-              bitField0_ |= 0x00000004;
-              sum_ = input.readSInt64();
-              break;
-            }
-          }
-        }
-      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-        throw e.setUnfinishedMessage(this);
-      } catch (java.io.IOException e) {
-        throw new com.google.protobuf.InvalidProtocolBufferException(
-            e.getMessage()).setUnfinishedMessage(this);
-      } finally {
-        this.unknownFields = unknownFields.build();
-        makeExtensionsImmutable();
-      }
-    }
-    public static final com.google.protobuf.Descriptors.Descriptor
-        getDescriptor() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_StringStatistics_descriptor;
-    }
-
-    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-        internalGetFieldAccessorTable() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_StringStatistics_fieldAccessorTable
-          .ensureFieldAccessorsInitialized(
-              org.apache.orc.OrcProto.StringStatistics.class, org.apache.orc.OrcProto.StringStatistics.Builder.class);
-    }
-
-    public static com.google.protobuf.Parser<StringStatistics> PARSER =
-        new com.google.protobuf.AbstractParser<StringStatistics>() {
-      public StringStatistics parsePartialFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws com.google.protobuf.InvalidProtocolBufferException {
-        return new StringStatistics(input, extensionRegistry);
-      }
-    };
-
-    @java.lang.Override
-    public com.google.protobuf.Parser<StringStatistics> getParserForType() {
-      return PARSER;
-    }
-
-    private int bitField0_;
-    // optional string minimum = 1;
-    public static final int MINIMUM_FIELD_NUMBER = 1;
-    private java.lang.Object minimum_;
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    public boolean hasMinimum() {
-      return ((bitField0_ & 0x00000001) == 0x00000001);
-    }
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    public java.lang.String getMinimum() {
-      java.lang.Object ref = minimum_;
-      if (ref instanceof java.lang.String) {
-        return (java.lang.String) ref;
-      } else {
-        com.google.protobuf.ByteString bs = 
-            (com.google.protobuf.ByteString) ref;
-        java.lang.String s = bs.toStringUtf8();
-        if (bs.isValidUtf8()) {
-          minimum_ = s;
-        }
-        return s;
-      }
-    }
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    public com.google.protobuf.ByteString
-        getMinimumBytes() {
-      java.lang.Object ref = minimum_;
-      if (ref instanceof java.lang.String) {
-        com.google.protobuf.ByteString b = 
-            com.google.protobuf.ByteString.copyFromUtf8(
-                (java.lang.String) ref);
-        minimum_ = b;
-        return b;
-      } else {
-        return (com.google.protobuf.ByteString) ref;
-      }
-    }
-
-    // optional string maximum = 2;
-    public static final int MAXIMUM_FIELD_NUMBER = 2;
-    private java.lang.Object maximum_;
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    public boolean hasMaximum() {
-      return ((bitField0_ & 0x00000002) == 0x00000002);
-    }
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    public java.lang.String getMaximum() {
-      java.lang.Object ref = maximum_;
-      if (ref instanceof java.lang.String) {
-        return (java.lang.String) ref;
-      } else {
-        com.google.protobuf.ByteString bs = 
-            (com.google.protobuf.ByteString) ref;
-        java.lang.String s = bs.toStringUtf8();
-        if (bs.isValidUtf8()) {
-          maximum_ = s;
-        }
-        return s;
-      }
-    }
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    public com.google.protobuf.ByteString
-        getMaximumBytes() {
-      java.lang.Object ref = maximum_;
-      if (ref instanceof java.lang.String) {
-        com.google.protobuf.ByteString b = 
-            com.google.protobuf.ByteString.copyFromUtf8(
-                (java.lang.String) ref);
-        maximum_ = b;
-        return b;
-      } else {
-        return (com.google.protobuf.ByteString) ref;
-      }
-    }
-
-    // optional sint64 sum = 3;
-    public static final int SUM_FIELD_NUMBER = 3;
-    private long sum_;
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     *
-     * <pre>
-     * sum will store the total length of all strings in a stripe
-     * </pre>
-     */
-    public boolean hasSum() {
-      return ((bitField0_ & 0x00000004) == 0x00000004);
-    }
-    /**
-     * <code>optional sint64 sum = 3;</code>
-     *
-     * <pre>
-     * sum will store the total length of all strings in a stripe
-     * </pre>
-     */
-    public long getSum() {
-      return sum_;
-    }
-
-    private void initFields() {
-      minimum_ = "";
-      maximum_ = "";
-      sum_ = 0L;
-    }
-    private byte memoizedIsInitialized = -1;
-    public final boolean isInitialized() {
-      byte isInitialized = memoizedIsInitialized;
-      if (isInitialized != -1) return isInitialized == 1;
-
-      memoizedIsInitialized = 1;
-      return true;
-    }
-
-    public void writeTo(com.google.protobuf.CodedOutputStream output)
-                        throws java.io.IOException {
-      getSerializedSize();
-      if (((bitField0_ & 0x00000001) == 0x00000001)) {
-        output.writeBytes(1, getMinimumBytes());
-      }
-      if (((bitField0_ & 0x00000002) == 0x00000002)) {
-        output.writeBytes(2, getMaximumBytes());
-      }
-      if (((bitField0_ & 0x00000004) == 0x00000004)) {
-        output.writeSInt64(3, sum_);
-      }
-      getUnknownFields().writeTo(output);
-    }
-
-    private int memoizedSerializedSize = -1;
-    public int getSerializedSize() {
-      int size = memoizedSerializedSize;
-      if (size != -1) return size;
-
-      size = 0;
-      if (((bitField0_ & 0x00000001) == 0x00000001)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeBytesSize(1, getMinimumBytes());
-      }
-      if (((bitField0_ & 0x00000002) == 0x00000002)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeBytesSize(2, getMaximumBytes());
-      }
-      if (((bitField0_ & 0x00000004) == 0x00000004)) {
-        size += com.google.protobuf.CodedOutputStream
-          .computeSInt64Size(3, sum_);
-      }
-      size += getUnknownFields().getSerializedSize();
-      memoizedSerializedSize = size;
-      return size;
-    }
-
-    private static final long serialVersionUID = 0L;
-    @java.lang.Override
-    protected java.lang.Object writeReplace()
-        throws java.io.ObjectStreamException {
-      return super.writeReplace();
-    }
-
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(
-        com.google.protobuf.ByteString data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(
-        com.google.protobuf.ByteString data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(byte[] data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(
-        byte[] data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseDelimitedFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseDelimitedFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.StringStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-
-    public static Builder newBuilder() { return Builder.create(); }
-    public Builder newBuilderForType() { return newBuilder(); }
-    public static Builder newBuilder(org.apache.orc.OrcProto.StringStatistics prototype) {
-      return newBuilder().mergeFrom(prototype);
-    }
-    public Builder toBuilder() { return newBuilder(this); }
-
-    @java.lang.Override
-    protected Builder newBuilderForType(
-        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-      Builder builder = new Builder(parent);
-      return builder;
-    }
-    /**
-     * Protobuf type {@code orc.proto.StringStatistics}
-     */
-    public static final class Builder extends
-        com.google.protobuf.GeneratedMessage.Builder<Builder>
-       implements org.apache.orc.OrcProto.StringStatisticsOrBuilder {
-      public static final com.google.protobuf.Descriptors.Descriptor
-          getDescriptor() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_StringStatistics_descriptor;
-      }
-
-      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-          internalGetFieldAccessorTable() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_StringStatistics_fieldAccessorTable
-            .ensureFieldAccessorsInitialized(
-                org.apache.orc.OrcProto.StringStatistics.class, org.apache.orc.OrcProto.StringStatistics.Builder.class);
-      }
-
-      // Construct using org.apache.orc.OrcProto.StringStatistics.newBuilder()
-      private Builder() {
-        maybeForceBuilderInitialization();
-      }
-
-      private Builder(
-          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-        super(parent);
-        maybeForceBuilderInitialization();
-      }
-      private void maybeForceBuilderInitialization() {
-        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
-        }
-      }
-      private static Builder create() {
-        return new Builder();
-      }
-
-      public Builder clear() {
-        super.clear();
-        minimum_ = "";
-        bitField0_ = (bitField0_ & ~0x00000001);
-        maximum_ = "";
-        bitField0_ = (bitField0_ & ~0x00000002);
-        sum_ = 0L;
-        bitField0_ = (bitField0_ & ~0x00000004);
-        return this;
-      }
-
-      public Builder clone() {
-        return create().mergeFrom(buildPartial());
-      }
-
-      public com.google.protobuf.Descriptors.Descriptor
-          getDescriptorForType() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_StringStatistics_descriptor;
-      }
-
-      public org.apache.orc.OrcProto.StringStatistics getDefaultInstanceForType() {
-        return org.apache.orc.OrcProto.StringStatistics.getDefaultInstance();
-      }
-
-      public org.apache.orc.OrcProto.StringStatistics build() {
-        org.apache.orc.OrcProto.StringStatistics result = buildPartial();
-        if (!result.isInitialized()) {
-          throw newUninitializedMessageException(result);
-        }
-        return result;
-      }
-
-      public org.apache.orc.OrcProto.StringStatistics buildPartial() {
-        org.apache.orc.OrcProto.StringStatistics result = new org.apache.orc.OrcProto.StringStatistics(this);
-        int from_bitField0_ = bitField0_;
-        int to_bitField0_ = 0;
-        if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
-          to_bitField0_ |= 0x00000001;
-        }
-        result.minimum_ = minimum_;
-        if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
-          to_bitField0_ |= 0x00000002;
-        }
-        result.maximum_ = maximum_;
-        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
-          to_bitField0_ |= 0x00000004;
-        }
-        result.sum_ = sum_;
-        result.bitField0_ = to_bitField0_;
-        onBuilt();
-        return result;
-      }
-
-      public Builder mergeFrom(com.google.protobuf.Message other) {
-        if (other instanceof org.apache.orc.OrcProto.StringStatistics) {
-          return mergeFrom((org.apache.orc.OrcProto.StringStatistics)other);
-        } else {
-          super.mergeFrom(other);
-          return this;
-        }
-      }
-
-      public Builder mergeFrom(org.apache.orc.OrcProto.StringStatistics other) {
-        if (other == org.apache.orc.OrcProto.StringStatistics.getDefaultInstance()) return this;
-        if (other.hasMinimum()) {
-          bitField0_ |= 0x00000001;
-          minimum_ = other.minimum_;
-          onChanged();
-        }
-        if (other.hasMaximum()) {
-          bitField0_ |= 0x00000002;
-          maximum_ = other.maximum_;
-          onChanged();
-        }
-        if (other.hasSum()) {
-          setSum(other.getSum());
-        }
-        this.mergeUnknownFields(other.getUnknownFields());
-        return this;
-      }
-
-      public final boolean isInitialized() {
-        return true;
-      }
-
-      public Builder mergeFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws java.io.IOException {
-        org.apache.orc.OrcProto.StringStatistics parsedMessage = null;
-        try {
-          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
-        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-          parsedMessage = (org.apache.orc.OrcProto.StringStatistics) e.getUnfinishedMessage();
-          throw e;
-        } finally {
-          if (parsedMessage != null) {
-            mergeFrom(parsedMessage);
-          }
-        }
-        return this;
-      }
-      private int bitField0_;
-
-      // optional string minimum = 1;
-      private java.lang.Object minimum_ = "";
-      /**
-       * <code>optional string minimum = 1;</code>
-       */
-      public boolean hasMinimum() {
-        return ((bitField0_ & 0x00000001) == 0x00000001);
-      }
-      /**
-       * <code>optional string minimum = 1;</code>
-       */
-      public java.lang.String getMinimum() {
-        java.lang.Object ref = minimum_;
-        if (!(ref instanceof java.lang.String)) {
-          java.lang.String s = ((com.google.protobuf.ByteString) ref)
-              .toStringUtf8();
-          minimum_ = s;
-          return s;
-        } else {
-          return (java.lang.String) ref;
-        }
-      }
-      /**
-       * <code>optional string minimum = 1;</code>
-       */
-      public com.google.protobuf.ByteString
-          getMinimumBytes() {
-        java.lang.Object ref = minimum_;
-        if (ref instanceof String) {
-          com.google.protobuf.ByteString b = 
-              com.google.protobuf.ByteString.copyFromUtf8(
-                  (java.lang.String) ref);
-          minimum_ = b;
-          return b;
-        } else {
-          return (com.google.protobuf.ByteString) ref;
-        }
-      }
-      /**
-       * <code>optional string minimum = 1;</code>
-       */
-      public Builder setMinimum(
-          java.lang.String value) {
-        if (value == null) {
-    throw new NullPointerException();
-  }
-  bitField0_ |= 0x00000001;
-        minimum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional string minimum = 1;</code>
-       */
-      public Builder clearMinimum() {
-        bitField0_ = (bitField0_ & ~0x00000001);
-        minimum_ = getDefaultInstance().getMinimum();
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional string minimum = 1;</code>
-       */
-      public Builder setMinimumBytes(
-          com.google.protobuf.ByteString value) {
-        if (value == null) {
-    throw new NullPointerException();
-  }
-  bitField0_ |= 0x00000001;
-        minimum_ = value;
-        onChanged();
-        return this;
-      }
-
-      // optional string maximum = 2;
-      private java.lang.Object maximum_ = "";
-      /**
-       * <code>optional string maximum = 2;</code>
-       */
-      public boolean hasMaximum() {
-        return ((bitField0_ & 0x00000002) == 0x00000002);
-      }
-      /**
-       * <code>optional string maximum = 2;</code>
-       */
-      public java.lang.String getMaximum() {
-        java.lang.Object ref = maximum_;
-        if (!(ref instanceof java.lang.String)) {
-          java.lang.String s = ((com.google.protobuf.ByteString) ref)
-              .toStringUtf8();
-          maximum_ = s;
-          return s;
-        } else {
-          return (java.lang.String) ref;
-        }
-      }
-      /**
-       * <code>optional string maximum = 2;</code>
-       */
-      public com.google.protobuf.ByteString
-          getMaximumBytes() {
-        java.lang.Object ref = maximum_;
-        if (ref instanceof String) {
-          com.google.protobuf.ByteString b = 
-              com.google.protobuf.ByteString.copyFromUtf8(
-                  (java.lang.String) ref);
-          maximum_ = b;
-          return b;
-        } else {
-          return (com.google.protobuf.ByteString) ref;
-        }
-      }
-      /**
-       * <code>optional string maximum = 2;</code>
-       */
-      public Builder setMaximum(
-          java.lang.String value) {
-        if (value == null) {
-    throw new NullPointerException();
-  }
-  bitField0_ |= 0x00000002;
-        maximum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional string maximum = 2;</code>
-       */
-      public Builder clearMaximum() {
-        bitField0_ = (bitField0_ & ~0x00000002);
-        maximum_ = getDefaultInstance().getMaximum();
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional string maximum = 2;</code>
-       */
-      public Builder setMaximumBytes(
-          com.google.protobuf.ByteString value) {
-        if (value == null) {
-    throw new NullPointerException();
-  }
-  bitField0_ |= 0x00000002;
-        maximum_ = value;
-        onChanged();
-        return this;
-      }
-
-      // optional sint64 sum = 3;
-      private long sum_ ;
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       *
-       * <pre>
-       * sum will store the total length of all strings in a stripe
-       * </pre>
-       */
-      public boolean hasSum() {
-        return ((bitField0_ & 0x00000004) == 0x00000004);
-      }
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       *
-       * <pre>
-       * sum will store the total length of all strings in a stripe
-       * </pre>
-       */
-      public long getSum() {
-        return sum_;
-      }
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       *
-       * <pre>
-       * sum will store the total length of all strings in a stripe
-       * </pre>
-       */
-      public Builder setSum(long value) {
-        bitField0_ |= 0x00000004;
-        sum_ = value;
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>optional sint64 sum = 3;</code>
-       *
-       * <pre>
-       * sum will store the total length of all strings in a stripe
-       * </pre>
-       */
-      public Builder clearSum() {
-        bitField0_ = (bitField0_ & ~0x00000004);
-        sum_ = 0L;
-        onChanged();
-        return this;
-      }
-
-      // @@protoc_insertion_point(builder_scope:orc.proto.StringStatistics)
-    }
-
-    static {
-      defaultInstance = new StringStatistics(true);
-      defaultInstance.initFields();
-    }
-
-    // @@protoc_insertion_point(class_scope:orc.proto.StringStatistics)
-  }
-
-  public interface BucketStatisticsOrBuilder
-      extends com.google.protobuf.MessageOrBuilder {
-
-    // repeated uint64 count = 1 [packed = true];
-    /**
-     * <code>repeated uint64 count = 1 [packed = true];</code>
-     */
-    java.util.List<java.lang.Long> getCountList();
-    /**
-     * <code>repeated uint64 count = 1 [packed = true];</code>
-     */
-    int getCountCount();
-    /**
-     * <code>repeated uint64 count = 1 [packed = true];</code>
-     */
-    long getCount(int index);
-  }
-  /**
-   * Protobuf type {@code orc.proto.BucketStatistics}
-   */
-  public static final class BucketStatistics extends
-      com.google.protobuf.GeneratedMessage
-      implements BucketStatisticsOrBuilder {
-    // Use BucketStatistics.newBuilder() to construct.
-    private BucketStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
-      super(builder);
-      this.unknownFields = builder.getUnknownFields();
-    }
-    private BucketStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
-
-    private static final BucketStatistics defaultInstance;
-    public static BucketStatistics getDefaultInstance() {
-      return defaultInstance;
-    }
-
-    public BucketStatistics getDefaultInstanceForType() {
-      return defaultInstance;
-    }
-
-    private final com.google.protobuf.UnknownFieldSet unknownFields;
-    @java.lang.Override
-    public final com.google.protobuf.UnknownFieldSet
-        getUnknownFields() {
-      return this.unknownFields;
-    }
-    private BucketStatistics(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      initFields();
-      int mutable_bitField0_ = 0;
-      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
-          com.google.protobuf.UnknownFieldSet.newBuilder();
-      try {
-        boolean done = false;
-        while (!done) {
-          int tag = input.readTag();
-          switch (tag) {
-            case 0:
-              done = true;
-              break;
-            default: {
-              if (!parseUnknownField(input, unknownFields,
-                                     extensionRegistry, tag)) {
-                done = true;
-              }
-              break;
-            }
-            case 8: {
-              if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) {
-                count_ = new java.util.ArrayList<java.lang.Long>();
-                mutable_bitField0_ |= 0x00000001;
-              }
-              count_.add(input.readUInt64());
-              break;
-            }
-            case 10: {
-              int length = input.readRawVarint32();
-              int limit = input.pushLimit(length);
-              if (!((mutable_bitField0_ & 0x00000001) == 0x00000001) && input.getBytesUntilLimit() > 0) {
-                count_ = new java.util.ArrayList<java.lang.Long>();
-                mutable_bitField0_ |= 0x00000001;
-              }
-              while (input.getBytesUntilLimit() > 0) {
-                count_.add(input.readUInt64());
-              }
-              input.popLimit(limit);
-              break;
-            }
-          }
-        }
-      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-        throw e.setUnfinishedMessage(this);
-      } catch (java.io.IOException e) {
-        throw new com.google.protobuf.InvalidProtocolBufferException(
-            e.getMessage()).setUnfinishedMessage(this);
-      } finally {
-        if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) {
-          count_ = java.util.Collections.unmodifiableList(count_);
-        }
-        this.unknownFields = unknownFields.build();
-        makeExtensionsImmutable();
-      }
-    }
-    public static final com.google.protobuf.Descriptors.Descriptor
-        getDescriptor() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_BucketStatistics_descriptor;
-    }
-
-    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-        internalGetFieldAccessorTable() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_BucketStatistics_fieldAccessorTable
-          .ensureFieldAccessorsInitialized(
-              org.apache.orc.OrcProto.BucketStatistics.class, org.apache.orc.OrcProto.BucketStatistics.Builder.class);
-    }
-
-    public static com.google.protobuf.Parser<BucketStatistics> PARSER =
-        new com.google.protobuf.AbstractParser<BucketStatistics>() {
-      public BucketStatistics parsePartialFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws com.google.protobuf.InvalidProtocolBufferException {
-        return new BucketStatistics(input, extensionRegistry);
-      }
-    };
-
-    @java.lang.Override
-    public com.google.protobuf.Parser<BucketStatistics> getParserForType() {
-      return PARSER;
-    }
-
-    // repeated uint64 count = 1 [packed = true];
-    public static final int COUNT_FIELD_NUMBER = 1;
-    private java.util.List<java.lang.Long> count_;
-    /**
-     * <code>repeated uint64 count = 1 [packed = true];</code>
-     */
-    public java.util.List<java.lang.Long>
-        getCountList() {
-      return count_;
-    }
-    /**
-     * <code>repeated uint64 count = 1 [packed = true];</code>
-     */
-    public int getCountCount() {
-      return count_.size();
-    }
-    /**
-     * <code>repeated uint64 count = 1 [packed = true];</code>
-     */
-    public long getCount(int index) {
-      return count_.get(index);
-    }
-    private int countMemoizedSerializedSize = -1;
-
-    private void initFields() {
-      count_ = java.util.Collections.emptyList();
-    }
-    private byte memoizedIsInitialized = -1;
-    public final boolean isInitialized() {
-      byte isInitialized = memoizedIsInitialized;
-      if (isInitialized != -1) return isInitialized == 1;
-
-      memoizedIsInitialized = 1;
-      return true;
-    }
-
-    public void writeTo(com.google.protobuf.CodedOutputStream output)
-                        throws java.io.IOException {
-      getSerializedSize();
-      if (getCountList().size() > 0) {
-        output.writeRawVarint32(10);
-        output.writeRawVarint32(countMemoizedSerializedSize);
-      }
-      for (int i = 0; i < count_.size(); i++) {
-        output.writeUInt64NoTag(count_.get(i));
-      }
-      getUnknownFields().writeTo(output);
-    }
-
-    private int memoizedSerializedSize = -1;
-    public int getSerializedSize() {
-      int size = memoizedSerializedSize;
-      if (size != -1) return size;
-
-      size = 0;
-      {
-        int dataSize = 0;
-        for (int i = 0; i < count_.size(); i++) {
-          dataSize += com.google.protobuf.CodedOutputStream
-            .computeUInt64SizeNoTag(count_.get(i));
-        }
-        size += dataSize;
-        if (!getCountList().isEmpty()) {
-          size += 1;
-          size += com.google.protobuf.CodedOutputStream
-              .computeInt32SizeNoTag(dataSize);
-        }
-        countMemoizedSerializedSize = dataSize;
-      }
-      size += getUnknownFields().getSerializedSize();
-      memoizedSerializedSize = size;
-      return size;
-    }
-
-    private static final long serialVersionUID = 0L;
-    @java.lang.Override
-    protected java.lang.Object writeReplace()
-        throws java.io.ObjectStreamException {
-      return super.writeReplace();
-    }
-
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(
-        com.google.protobuf.ByteString data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(
-        com.google.protobuf.ByteString data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(byte[] data)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(
-        byte[] data,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return PARSER.parseFrom(data, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseDelimitedFrom(java.io.InputStream input)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseDelimitedFrom(
-        java.io.InputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseDelimitedFrom(input, extensionRegistry);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input);
-    }
-    public static org.apache.orc.OrcProto.BucketStatistics parseFrom(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return PARSER.parseFrom(input, extensionRegistry);
-    }
-
-    public static Builder newBuilder() { return Builder.create(); }
-    public Builder newBuilderForType() { return newBuilder(); }
-    public static Builder newBuilder(org.apache.orc.OrcProto.BucketStatistics prototype) {
-      return newBuilder().mergeFrom(prototype);
-    }
-    public Builder toBuilder() { return newBuilder(this); }
-
-    @java.lang.Override
-    protected Builder newBuilderForType(
-        com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-      Builder builder = new Builder(parent);
-      return builder;
-    }
-    /**
-     * Protobuf type {@code orc.proto.BucketStatistics}
-     */
-    public static final class Builder extends
-        com.google.protobuf.GeneratedMessage.Builder<Builder>
-       implements org.apache.orc.OrcProto.BucketStatisticsOrBuilder {
-      public static final com.google.protobuf.Descriptors.Descriptor
-          getDescriptor() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_BucketStatistics_descriptor;
-      }
-
-      protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-          internalGetFieldAccessorTable() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_BucketStatistics_fieldAccessorTable
-            .ensureFieldAccessorsInitialized(
-                org.apache.orc.OrcProto.BucketStatistics.class, org.apache.orc.OrcProto.BucketStatistics.Builder.class);
-      }
-
-      // Construct using org.apache.orc.OrcProto.BucketStatistics.newBuilder()
-      private Builder() {
-        maybeForceBuilderInitialization();
-      }
-
-      private Builder(
-          com.google.protobuf.GeneratedMessage.BuilderParent parent) {
-        super(parent);
-        maybeForceBuilderInitialization();
-      }
-      private void maybeForceBuilderInitialization() {
-        if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
-        }
-      }
-      private static Builder create() {
-        return new Builder();
-      }
-
-      public Builder clear() {
-        super.clear();
-        count_ = java.util.Collections.emptyList();
-        bitField0_ = (bitField0_ & ~0x00000001);
-        return this;
-      }
-
-      public Builder clone() {
-        return create().mergeFrom(buildPartial());
-      }
-
-      public com.google.protobuf.Descriptors.Descriptor
-          getDescriptorForType() {
-        return org.apache.orc.OrcProto.internal_static_orc_proto_BucketStatistics_descriptor;
-      }
-
-      public org.apache.orc.OrcProto.BucketStatistics getDefaultInstanceForType() {
-        return org.apache.orc.OrcProto.BucketStatistics.getDefaultInstance();
-      }
-
-      public org.apache.orc.OrcProto.BucketStatistics build() {
-        org.apache.orc.OrcProto.BucketStatistics result = buildPartial();
-        if (!result.isInitialized()) {
-          throw newUninitializedMessageException(result);
-        }
-        return result;
-      }
-
-      public org.apache.orc.OrcProto.BucketStatistics buildPartial() {
-        org.apache.orc.OrcProto.BucketStatistics result = new org.apache.orc.OrcProto.BucketStatistics(this);
-        int from_bitField0_ = bitField0_;
-        if (((bitField0_ & 0x00000001) == 0x00000001)) {
-          count_ = java.util.Collections.unmodifiableList(count_);
-          bitField0_ = (bitField0_ & ~0x00000001);
-        }
-        result.count_ = count_;
-        onBuilt();
-        return result;
-      }
-
-      public Builder mergeFrom(com.google.protobuf.Message other) {
-        if (other instanceof org.apache.orc.OrcProto.BucketStatistics) {
-          return mergeFrom((org.apache.orc.OrcProto.BucketStatistics)other);
-        } else {
-          super.mergeFrom(other);
-          return this;
-        }
-      }
-
-      public Builder mergeFrom(org.apache.orc.OrcProto.BucketStatistics other) {
-        if (other == org.apache.orc.OrcProto.BucketStatistics.getDefaultInstance()) return this;
-        if (!other.count_.isEmpty()) {
-          if (count_.isEmpty()) {
-            count_ = other.count_;
-            bitField0_ = (bitField0_ & ~0x00000001);
-          } else {
-            ensureCountIsMutable();
-            count_.addAll(other.count_);
-          }
-          onChanged();
-        }
-        this.mergeUnknownFields(other.getUnknownFields());
-        return this;
-      }
-
-      public final boolean isInitialized() {
-        return true;
-      }
-
-      public Builder mergeFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws java.io.IOException {
-        org.apache.orc.OrcProto.BucketStatistics parsedMessage = null;
-        try {
-          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
-        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-          parsedMessage = (org.apache.orc.OrcProto.BucketStatistics) e.getUnfinishedMessage();
-          throw e;
-        } finally {
-          if (parsedMessage != null) {
-            mergeFrom(parsedMessage);
-          }
-        }
-        return this;
-      }
-      private int bitField0_;
-
-      // repeated uint64 count = 1 [packed = true];
-      private java.util.List<java.lang.Long> count_ = java.util.Collections.emptyList();
-      private void ensureCountIsMutable() {
-        if (!((bitField0_ & 0x00000001) == 0x00000001)) {
-          count_ = new java.util.ArrayList<java.lang.Long>(count_);
-          bitField0_ |= 0x00000001;
-         }
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public java.util.List<java.lang.Long>
-          getCountList() {
-        return java.util.Collections.unmodifiableList(count_);
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public int getCountCount() {
-        return count_.size();
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public long getCount(int index) {
-        return count_.get(index);
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public Builder setCount(
-          int index, long value) {
-        ensureCountIsMutable();
-        count_.set(index, value);
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public Builder addCount(long value) {
-        ensureCountIsMutable();
-        count_.add(value);
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public Builder addAllCount(
-          java.lang.Iterable<? extends java.lang.Long> values) {
-        ensureCountIsMutable();
-        super.addAll(values, count_);
-        onChanged();
-        return this;
-      }
-      /**
-       * <code>repeated uint64 count = 1 [packed = true];</code>
-       */
-      public Builder clearCount() {
-        count_ = java.util.Collections.emptyList();
-        bitField0_ = (bitField0_ & ~0x00000001);
-        onChanged();
-        return this;
-      }
-
-      // @@protoc_insertion_point(builder_scope:orc.proto.BucketStatistics)
-    }
-
-    static {
-      defaultInstance = new BucketStatistics(true);
-      defaultInstance.initFields();
-    }
-
-    // @@protoc_insertion_point(class_scope:orc.proto.BucketStatistics)
-  }
-
-  public interface DecimalStatisticsOrBuilder
-      extends com.google.protobuf.MessageOrBuilder {
-
-    // optional string minimum = 1;
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    boolean hasMinimum();
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    java.lang.String getMinimum();
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    com.google.protobuf.ByteString
-        getMinimumBytes();
-
-    // optional string maximum = 2;
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    boolean hasMaximum();
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    java.lang.String getMaximum();
-    /**
-     * <code>optional string maximum = 2;</code>
-     */
-    com.google.protobuf.ByteString
-        getMaximumBytes();
-
-    // optional string sum = 3;
-    /**
-     * <code>optional string sum = 3;</code>
-     */
-    boolean hasSum();
-    /**
-     * <code>optional string sum = 3;</code>
-     */
-    java.lang.String getSum();
-    /**
-     * <code>optional string sum = 3;</code>
-     */
-    com.google.protobuf.ByteString
-        getSumBytes();
-  }
-  /**
-   * Protobuf type {@code orc.proto.DecimalStatistics}
-   */
-  public static final class DecimalStatistics extends
-      com.google.protobuf.GeneratedMessage
-      implements DecimalStatisticsOrBuilder {
-    // Use DecimalStatistics.newBuilder() to construct.
-    private DecimalStatistics(com.google.protobuf.GeneratedMessage.Builder<?> builder) {
-      super(builder);
-      this.unknownFields = builder.getUnknownFields();
-    }
-    private DecimalStatistics(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
-
-    private static final DecimalStatistics defaultInstance;
-    public static DecimalStatistics getDefaultInstance() {
-      return defaultInstance;
-    }
-
-    public DecimalStatistics getDefaultInstanceForType() {
-      return defaultInstance;
-    }
-
-    private final com.google.protobuf.UnknownFieldSet unknownFields;
-    @java.lang.Override
-    public final com.google.protobuf.UnknownFieldSet
-        getUnknownFields() {
-      return this.unknownFields;
-    }
-    private DecimalStatistics(
-        com.google.protobuf.CodedInputStream input,
-        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      initFields();
-      int mutable_bitField0_ = 0;
-      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
-          com.google.protobuf.UnknownFieldSet.newBuilder();
-      try {
-        boolean done = false;
-        while (!done) {
-          int tag = input.readTag();
-          switch (tag) {
-            case 0:
-              done = true;
-              break;
-            default: {
-              if (!parseUnknownField(input, unknownFields,
-                                     extensionRegistry, tag)) {
-                done = true;
-              }
-              break;
-            }
-            case 10: {
-              bitField0_ |= 0x00000001;
-              minimum_ = input.readBytes();
-              break;
-            }
-            case 18: {
-              bitField0_ |= 0x00000002;
-              maximum_ = input.readBytes();
-              break;
-            }
-            case 26: {
-              bitField0_ |= 0x00000004;
-              sum_ = input.readBytes();
-              break;
-            }
-          }
-        }
-      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
-        throw e.setUnfinishedMessage(this);
-      } catch (java.io.IOException e) {
-        throw new com.google.protobuf.InvalidProtocolBufferException(
-            e.getMessage()).setUnfinishedMessage(this);
-      } finally {
-        this.unknownFields = unknownFields.build();
-        makeExtensionsImmutable();
-      }
-    }
-    public static final com.google.protobuf.Descriptors.Descriptor
-        getDescriptor() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_DecimalStatistics_descriptor;
-    }
-
-    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
-        internalGetFieldAccessorTable() {
-      return org.apache.orc.OrcProto.internal_static_orc_proto_DecimalStatistics_fieldAccessorTable
-          .ensureFieldAccessorsInitialized(
-              org.apache.orc.OrcProto.DecimalStatistics.class, org.apache.orc.OrcProto.DecimalStatistics.Builder.class);
-    }
-
-    public static com.google.protobuf.Parser<DecimalStatistics> PARSER =
-        new com.google.protobuf.AbstractParser<DecimalStatistics>() {
-      public DecimalStatistics parsePartialFrom(
-          com.google.protobuf.CodedInputStream input,
-          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws com.google.protobuf.InvalidProtocolBufferException {
-        return new DecimalStatistics(input, extensionRegistry);
-      }
-    };
-
-    @java.lang.Override
-    public com.google.protobuf.Parser<DecimalStatistics> getParserForType() {
-      return PARSER;
-    }
-
-    private int bitField0_;
-    // optional string minimum = 1;
-    public static final int MINIMUM_FIELD_NUMBER = 1;
-    private java.lang.Object minimum_;
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    public boolean hasMinimum() {
-      return ((bitField0_ & 0x00000001) == 0x00000001);
-    }
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    public java.lang.String getMinimum() {
-      java.lang.Object ref = minimum_;
-      if (ref instanceof java.lang.String) {
-        return (java.lang.String) ref;
-      } else {
-        com.google.protobuf.ByteString bs = 
-            (com.google.protobuf.ByteString) ref;
-        java.lang.String s = bs.toStringUtf8();
-        if (bs.isValidUtf8()) {
-          minimum_ = s;
-        }
-        return s;
-      }
-    }
-    /**
-     * <code>optional string minimum = 1;</code>
-     */
-    public com.google.protobuf.ByteString
-        getMinimumBytes() {
-      java.lang.Object ref = minimum_;
-      if (ref instanceof java.lang.String) {
-        com.google.protobuf.ByteString b = 
-            com.google.protobuf.ByteString.copyFromUtf8(
-                (java.lang.String) ref);
-        minimum_ = b;


<TRUNCATED>

[23/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/IntegerColumnStatistics.java b/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
deleted file mode 100644
index 1a162ff..0000000
--- a/orc/src/java/org/apache/orc/IntegerColumnStatistics.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for all of the integer columns, such as byte, short, int, and
- * long.
- */
-public interface IntegerColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the smallest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the minimum
-   */
-  long getMinimum();
-
-  /**
-   * Get the largest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the maximum
-   */
-  long getMaximum();
-
-  /**
-   * Is the sum defined? If the sum overflowed the counter this will be false.
-   * @return is the sum available
-   */
-  boolean isSumDefined();
-
-  /**
-   * Get the sum of the column. Only valid if isSumDefined returns true.
-   * @return the sum of the column
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcConf.java b/orc/src/java/org/apache/orc/OrcConf.java
deleted file mode 100644
index 357318d..0000000
--- a/orc/src/java/org/apache/orc/OrcConf.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hadoop.conf.Configuration;
-
-import java.util.Properties;
-
-/**
- * Define the configuration properties that Orc understands.
- */
-public enum OrcConf {
-  STRIPE_SIZE("orc.stripe.size", "hive.exec.orc.default.stripe.size",
-      64L * 1024 * 1024,
-      "Define the default ORC stripe size, in bytes."),
-  BLOCK_SIZE("orc.block.size", "hive.exec.orc.default.block.size",
-      256L * 1024 * 1024,
-      "Define the default file system block size for ORC files."),
-  ENABLE_INDEXES("orc.create.index", "orc.create.index", true,
-      "Should the ORC writer create indexes as part of the file."),
-  ROW_INDEX_STRIDE("orc.row.index.stride",
-      "hive.exec.orc.default.row.index.stride", 10000,
-      "Define the default ORC index stride in number of rows. (Stride is the\n"+
-          " number of rows n index entry represents.)"),
-  BUFFER_SIZE("orc.compress.size", "hive.exec.orc.default.buffer.size",
-      256 * 1024, "Define the default ORC buffer size, in bytes."),
-  BASE_DELTA_RATIO("orc.base.delta.ratio", "hive.exec.orc.base.delta.ratio", 8,
-      "The ratio of base writer and delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
-  BLOCK_PADDING("orc.block.padding", "hive.exec.orc.default.block.padding",
-      true,
-      "Define whether stripes should be padded to the HDFS block boundaries."),
-  COMPRESS("orc.compress", "hive.exec.orc.default.compress", "ZLIB",
-      "Define the default compression codec for ORC file"),
-  WRITE_FORMAT("orc.write.format", "hive.exec.orc.write.format", "0.12",
-      "Define the version of the file to write. Possible values are 0.11 and\n"+
-          " 0.12. If this parameter is not defined, ORC will use the run\n" +
-          " length encoding (RLE) introduced in Hive 0.12."),
-  ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy",
-      "SPEED",
-      "Define the encoding strategy to use while writing data. Changing this\n"+
-          "will only affect the light weight encoding for integers. This\n" +
-          "flag will not change the compression level of higher level\n" +
-          "compression codec (like ZLIB)."),
-  COMPRESSION_STRATEGY("orc.compression.strategy",
-      "hive.exec.orc.compression.strategy", "SPEED",
-      "Define the compression strategy to use while writing data.\n" +
-          "This changes the compression level of higher level compression\n" +
-          "codec (like ZLIB)."),
-  BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
-      "hive.exec.orc.block.padding.tolerance", 0.05,
-      "Define the tolerance for block padding as a decimal fraction of\n" +
-          "stripe size (for example, the default value 0.05 is 5% of the\n" +
-          "stripe size). For the defaults of 64Mb ORC stripe and 256Mb HDFS\n" +
-          "blocks, the default block padding tolerance of 5% will\n" +
-          "reserve a maximum of 3.2Mb for padding within the 256Mb block.\n" +
-          "In that case, if the available size within the block is more than\n"+
-          "3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
-          "space. This will make sure that no stripe written will block\n" +
-          " boundaries and cause remote reads within a node local task."),
-  BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
-      "Define the default false positive probability for bloom filters."),
-  USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
-      "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
-  SKIP_CORRUPT_DATA("orc.skip.corrupt.data", "hive.exec.orc.skip.corrupt.data",
-      false,
-      "If ORC reader encounters corrupt data, this value will be used to\n" +
-          "determine whether to skip the corrupt data or throw exception.\n" +
-          "The default behavior is to throw exception."),
-  MEMORY_POOL("orc.memory.pool", "hive.exec.orc.memory.pool", 0.5,
-      "Maximum fraction of heap that can be used by ORC file writers"),
-  DICTIONARY_KEY_SIZE_THRESHOLD("orc.dictionary.key.threshold",
-      "hive.exec.orc.dictionary.key.size.threshold",
-      0.8,
-      "If the number of distinct keys in a dictionary is greater than this\n" +
-          "fraction of the total number of non-null rows, turn off \n" +
-          "dictionary encoding.  Use 1 to always use dictionary encoding."),
-  ROW_INDEX_STRIDE_DICTIONARY_CHECK("orc.dictionary.early.check",
-      "hive.orc.row.index.stride.dictionary.check",
-      true,
-      "If enabled dictionary check will happen after first row index stride\n" +
-          "(default 10000 rows) else dictionary check will happen before\n" +
-          "writing first stripe. In both cases, the decision to use\n" +
-          "dictionary or not will be retained thereafter."),
-  BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns", "orc.bloom.filter.columns",
-      "", "List of columns to create bloom filters for when writing.")
-  ;
-
-  private final String attribute;
-  private final String hiveConfName;
-  private final Object defaultValue;
-  private final String description;
-
-  OrcConf(String attribute,
-          String hiveConfName,
-          Object defaultValue,
-          String description) {
-    this.attribute = attribute;
-    this.hiveConfName = hiveConfName;
-    this.defaultValue = defaultValue;
-    this.description = description;
-  }
-
-  public String getAttribute() {
-    return attribute;
-  }
-
-  public String getHiveConfName() {
-    return hiveConfName;
-  }
-
-  public Object getDefaultValue() {
-    return defaultValue;
-  }
-
-  public String getDescription() {
-    return description;
-  }
-
-  private String lookupValue(Properties tbl, Configuration conf) {
-    String result = null;
-    if (tbl != null) {
-      result = tbl.getProperty(attribute);
-    }
-    if (result == null && conf != null) {
-      result = conf.get(attribute);
-      if (result == null) {
-        result = conf.get(hiveConfName);
-      }
-    }
-    return result;
-  }
-
-  public long getLong(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    if (value != null) {
-      return Long.parseLong(value);
-    }
-    return ((Number) defaultValue).longValue();
-  }
-
-  public long getLong(Configuration conf) {
-    return getLong(null, conf);
-  }
-
-  public String getString(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    return value == null ? (String) defaultValue : value;
-  }
-
-  public String getString(Configuration conf) {
-    return getString(null, conf);
-  }
-
-  public boolean getBoolean(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    if (value != null) {
-      return Boolean.parseBoolean(value);
-    }
-    return (Boolean) defaultValue;
-  }
-
-  public boolean getBoolean(Configuration conf) {
-    return getBoolean(null, conf);
-  }
-
-  public double getDouble(Properties tbl, Configuration conf) {
-    String value = lookupValue(tbl, conf);
-    if (value != null) {
-      return Double.parseDouble(value);
-    }
-    return ((Number) defaultValue).doubleValue();
-  }
-
-  public double getDouble(Configuration conf) {
-    return getDouble(null, conf);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcFile.java b/orc/src/java/org/apache/orc/OrcFile.java
deleted file mode 100644
index 06fb666..0000000
--- a/orc/src/java/org/apache/orc/OrcFile.java
+++ /dev/null
@@ -1,574 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.impl.MemoryManager;
-import org.apache.orc.impl.OrcTail;
-import org.apache.orc.impl.ReaderImpl;
-import org.apache.orc.impl.WriterImpl;
-
-/**
- * Contains factory methods to read or write ORC files.
- */
-public class OrcFile {
-  public static final String MAGIC = "ORC";
-
-  /**
-   * Create a version number for the ORC file format, so that we can add
-   * non-forward compatible changes in the future. To make it easier for users
-   * to understand the version numbers, we use the Hive release number that
-   * first wrote that version of ORC files.
-   *
-   * Thus, if you add new encodings or other non-forward compatible changes
-   * to ORC files, which prevent the old reader from reading the new format,
-   * you should change these variable to reflect the next Hive release number.
-   * Non-forward compatible changes should never be added in patch releases.
-   *
-   * Do not make any changes that break backwards compatibility, which would
-   * prevent the new reader from reading ORC files generated by any released
-   * version of Hive.
-   */
-  public enum Version {
-    V_0_11("0.11", 0, 11),
-    V_0_12("0.12", 0, 12);
-
-    public static final Version CURRENT = V_0_12;
-
-    private final String name;
-    private final int major;
-    private final int minor;
-
-    Version(String name, int major, int minor) {
-      this.name = name;
-      this.major = major;
-      this.minor = minor;
-    }
-
-    public static Version byName(String name) {
-      for(Version version: values()) {
-        if (version.name.equals(name)) {
-          return version;
-        }
-      }
-      throw new IllegalArgumentException("Unknown ORC version " + name);
-    }
-
-    /**
-     * Get the human readable name for the version.
-     */
-    public String getName() {
-      return name;
-    }
-
-    /**
-     * Get the major version number.
-     */
-    public int getMajor() {
-      return major;
-    }
-
-    /**
-     * Get the minor version number.
-     */
-    public int getMinor() {
-      return minor;
-    }
-  }
-
-  /**
-   * Records the version of the writer in terms of which bugs have been fixed.
-   * For bugs in the writer, but the old readers already read the new data
-   * correctly, bump this version instead of the Version.
-   */
-  public enum WriterVersion {
-    ORIGINAL(0),
-    HIVE_8732(1), // corrupted stripe/file maximum column statistics
-    HIVE_4243(2), // use real column names from Hive tables
-    HIVE_12055(3), // vectorized writer
-    HIVE_13083(4), // decimal writer updating present stream wrongly
-
-    // Don't use any magic numbers here except for the below:
-    FUTURE(Integer.MAX_VALUE); // a version from a future writer
-
-    private final int id;
-
-    public int getId() {
-      return id;
-    }
-
-    WriterVersion(int id) {
-      this.id = id;
-    }
-
-    private static final WriterVersion[] values;
-    static {
-      // Assumes few non-negative values close to zero.
-      int max = Integer.MIN_VALUE;
-      for (WriterVersion v : WriterVersion.values()) {
-        if (v.id < 0) throw new AssertionError();
-        if (v.id > max && FUTURE.id != v.id) {
-          max = v.id;
-        }
-      }
-      values = new WriterVersion[max + 1];
-      for (WriterVersion v : WriterVersion.values()) {
-        if (v.id < values.length) {
-          values[v.id] = v;
-        }
-      }
-    }
-
-    /**
-     * Convert the integer from OrcProto.PostScript.writerVersion
-     * to the enumeration with unknown versions being mapped to FUTURE.
-     * @param val the serialized writer version
-     * @return the corresponding enumeration value
-     */
-    public static WriterVersion from(int val) {
-      if (val >= values.length) {
-        return FUTURE;
-      }
-      return values[val];
-    }
-  }
-  public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_13083;
-
-  public enum EncodingStrategy {
-    SPEED, COMPRESSION
-  }
-
-  public enum CompressionStrategy {
-    SPEED, COMPRESSION
-  }
-
-  // unused
-  protected OrcFile() {}
-
-  public static class ReaderOptions {
-    private final Configuration conf;
-    private FileSystem filesystem;
-    private long maxLength = Long.MAX_VALUE;
-    private OrcTail orcTail;
-    // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
-    // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
-    // For now keeping this around to avoid complex surgery
-    private FileMetadata fileMetadata;
-
-    public ReaderOptions(Configuration conf) {
-      this.conf = conf;
-    }
-
-    public ReaderOptions filesystem(FileSystem fs) {
-      this.filesystem = fs;
-      return this;
-    }
-
-    public ReaderOptions maxLength(long val) {
-      maxLength = val;
-      return this;
-    }
-
-    public ReaderOptions orcTail(OrcTail tail) {
-      this.orcTail = tail;
-      return this;
-    }
-
-    public Configuration getConfiguration() {
-      return conf;
-    }
-
-    public FileSystem getFilesystem() {
-      return filesystem;
-    }
-
-    public long getMaxLength() {
-      return maxLength;
-    }
-
-    public OrcTail getOrcTail() {
-      return orcTail;
-    }
-
-    public ReaderOptions fileMetadata(final FileMetadata metadata) {
-      fileMetadata = metadata;
-      return this;
-    }
-
-    public FileMetadata getFileMetadata() {
-      return fileMetadata;
-    }
-  }
-
-  public static ReaderOptions readerOptions(Configuration conf) {
-    return new ReaderOptions(conf);
-  }
-
-  public static Reader createReader(Path path,
-                                    ReaderOptions options) throws IOException {
-    return new ReaderImpl(path, options);
-  }
-
-  public interface WriterContext {
-    Writer getWriter();
-  }
-
-  public interface WriterCallback {
-    void preStripeWrite(WriterContext context) throws IOException;
-    void preFooterWrite(WriterContext context) throws IOException;
-  }
-
-  /**
-   * Options for creating ORC file writers.
-   */
-  public static class WriterOptions {
-    private final Configuration configuration;
-    private FileSystem fileSystemValue = null;
-    private TypeDescription schema = null;
-    private long stripeSizeValue;
-    private long blockSizeValue;
-    private int rowIndexStrideValue;
-    private int bufferSizeValue;
-    private boolean enforceBufferSize = false;
-    private boolean blockPaddingValue;
-    private CompressionKind compressValue;
-    private MemoryManager memoryManagerValue;
-    private Version versionValue;
-    private WriterCallback callback;
-    private EncodingStrategy encodingStrategy;
-    private CompressionStrategy compressionStrategy;
-    private double paddingTolerance;
-    private String bloomFilterColumns;
-    private double bloomFilterFpp;
-
-    protected WriterOptions(Properties tableProperties, Configuration conf) {
-      configuration = conf;
-      memoryManagerValue = getStaticMemoryManager(conf);
-      stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
-      blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
-      rowIndexStrideValue =
-          (int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
-      bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties,
-          conf);
-      blockPaddingValue =
-          OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
-      compressValue =
-          CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
-              conf).toUpperCase());
-      String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
-          conf);
-      versionValue = Version.byName(versionName);
-      String enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties,
-          conf);
-      encodingStrategy = EncodingStrategy.valueOf(enString);
-
-      String compString =
-          OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
-      compressionStrategy = CompressionStrategy.valueOf(compString);
-
-      paddingTolerance =
-          OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
-
-      bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
-          conf);
-      bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
-          conf);
-    }
-
-    /**
-     * Provide the filesystem for the path, if the client has it available.
-     * If it is not provided, it will be found from the path.
-     */
-    public WriterOptions fileSystem(FileSystem value) {
-      fileSystemValue = value;
-      return this;
-    }
-
-    /**
-     * Set the stripe size for the file. The writer stores the contents of the
-     * stripe in memory until this memory limit is reached and the stripe
-     * is flushed to the HDFS file and the next stripe started.
-     */
-    public WriterOptions stripeSize(long value) {
-      stripeSizeValue = value;
-      return this;
-    }
-
-    /**
-     * Set the file system block size for the file. For optimal performance,
-     * set the block size to be multiple factors of stripe size.
-     */
-    public WriterOptions blockSize(long value) {
-      blockSizeValue = value;
-      return this;
-    }
-
-    /**
-     * Set the distance between entries in the row index. The minimum value is
-     * 1000 to prevent the index from overwhelming the data. If the stride is
-     * set to 0, no indexes will be included in the file.
-     */
-    public WriterOptions rowIndexStride(int value) {
-      rowIndexStrideValue = value;
-      return this;
-    }
-
-    /**
-     * The size of the memory buffers used for compressing and storing the
-     * stripe in memory. NOTE: ORC writer may choose to use smaller buffer
-     * size based on stripe size and number of columns for efficient stripe
-     * writing and memory utilization. To enforce writer to use the requested
-     * buffer size use enforceBufferSize().
-     */
-    public WriterOptions bufferSize(int value) {
-      bufferSizeValue = value;
-      return this;
-    }
-
-    /**
-     * Enforce writer to use requested buffer size instead of estimating
-     * buffer size based on stripe size and number of columns.
-     * See bufferSize() method for more info.
-     * Default: false
-     */
-    public WriterOptions enforceBufferSize() {
-      enforceBufferSize = true;
-      return this;
-    }
-
-    /**
-     * Sets whether the HDFS blocks are padded to prevent stripes from
-     * straddling blocks. Padding improves locality and thus the speed of
-     * reading, but costs space.
-     */
-    public WriterOptions blockPadding(boolean value) {
-      blockPaddingValue = value;
-      return this;
-    }
-
-    /**
-     * Sets the encoding strategy that is used to encode the data.
-     */
-    public WriterOptions encodingStrategy(EncodingStrategy strategy) {
-      encodingStrategy = strategy;
-      return this;
-    }
-
-    /**
-     * Sets the tolerance for block padding as a percentage of stripe size.
-     */
-    public WriterOptions paddingTolerance(double value) {
-      paddingTolerance = value;
-      return this;
-    }
-
-    /**
-     * Comma separated values of column names for which bloom filter is to be created.
-     */
-    public WriterOptions bloomFilterColumns(String columns) {
-      bloomFilterColumns = columns;
-      return this;
-    }
-
-    /**
-     * Specify the false positive probability for bloom filter.
-     * @param fpp - false positive probability
-     * @return this
-     */
-    public WriterOptions bloomFilterFpp(double fpp) {
-      bloomFilterFpp = fpp;
-      return this;
-    }
-
-    /**
-     * Sets the generic compression that is used to compress the data.
-     */
-    public WriterOptions compress(CompressionKind value) {
-      compressValue = value;
-      return this;
-    }
-
-    /**
-     * Set the schema for the file. This is a required parameter.
-     * @param schema the schema for the file.
-     * @return this
-     */
-    public WriterOptions setSchema(TypeDescription schema) {
-      this.schema = schema;
-      return this;
-    }
-
-    /**
-     * Sets the version of the file that will be written.
-     */
-    public WriterOptions version(Version value) {
-      versionValue = value;
-      return this;
-    }
-
-    /**
-     * Add a listener for when the stripe and file are about to be closed.
-     * @param callback the object to be called when the stripe is closed
-     * @return this
-     */
-    public WriterOptions callback(WriterCallback callback) {
-      this.callback = callback;
-      return this;
-    }
-
-    /**
-     * A package local option to set the memory manager.
-     */
-    protected WriterOptions memory(MemoryManager value) {
-      memoryManagerValue = value;
-      return this;
-    }
-
-    public boolean getBlockPadding() {
-      return blockPaddingValue;
-    }
-
-    public long getBlockSize() {
-      return blockSizeValue;
-    }
-
-    public String getBloomFilterColumns() {
-      return bloomFilterColumns;
-    }
-
-    public FileSystem getFileSystem() {
-      return fileSystemValue;
-    }
-
-    public Configuration getConfiguration() {
-      return configuration;
-    }
-
-    public TypeDescription getSchema() {
-      return schema;
-    }
-
-    public long getStripeSize() {
-      return stripeSizeValue;
-    }
-
-    public CompressionKind getCompress() {
-      return compressValue;
-    }
-
-    public WriterCallback getCallback() {
-      return callback;
-    }
-
-    public Version getVersion() {
-      return versionValue;
-    }
-
-    public MemoryManager getMemoryManager() {
-      return memoryManagerValue;
-    }
-
-    public int getBufferSize() {
-      return bufferSizeValue;
-    }
-
-    public boolean isEnforceBufferSize() {
-      return enforceBufferSize;
-    }
-
-    public int getRowIndexStride() {
-      return rowIndexStrideValue;
-    }
-
-    public CompressionStrategy getCompressionStrategy() {
-      return compressionStrategy;
-    }
-
-    public EncodingStrategy getEncodingStrategy() {
-      return encodingStrategy;
-    }
-
-    public double getPaddingTolerance() {
-      return paddingTolerance;
-    }
-
-    public double getBloomFilterFpp() {
-      return bloomFilterFpp;
-    }
-  }
-
-  /**
-   * Create a set of writer options based on a configuration.
-   * @param conf the configuration to use for values
-   * @return A WriterOptions object that can be modified
-   */
-  public static WriterOptions writerOptions(Configuration conf) {
-    return new WriterOptions(null, conf);
-  }
-
-  /**
-   * Create a set of write options based on a set of table properties and
-   * configuration.
-   * @param tableProperties the properties of the table
-   * @param conf the configuration of the query
-   * @return a WriterOptions object that can be modified
-   */
-  public static WriterOptions writerOptions(Properties tableProperties,
-                                            Configuration conf) {
-    return new WriterOptions(tableProperties, conf);
-  }
-
-  private static ThreadLocal<MemoryManager> memoryManager = null;
-
-  private static synchronized MemoryManager getStaticMemoryManager(
-      final Configuration conf) {
-    if (memoryManager == null) {
-      memoryManager = new ThreadLocal<MemoryManager>() {
-        @Override
-        protected MemoryManager initialValue() {
-          return new MemoryManager(conf);
-        }
-      };
-    }
-    return memoryManager.get();
-  }
-
-  /**
-   * Create an ORC file writer. This is the public interface for creating
-   * writers going forward and new options will only be added to this method.
-   * @param path filename to write to
-   * @param opts the options
-   * @return a new ORC file writer
-   * @throws IOException
-   */
-  public static Writer createWriter(Path path,
-                                    WriterOptions opts
-                                    ) throws IOException {
-    FileSystem fs = opts.getFileSystem() == null ?
-        path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
-
-    return new WriterImpl(fs, path, opts);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/OrcUtils.java b/orc/src/java/org/apache/orc/OrcUtils.java
deleted file mode 100644
index 4f02926..0000000
--- a/orc/src/java/org/apache/orc/OrcUtils.java
+++ /dev/null
@@ -1,624 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.orc.OrcProto.Type.Builder;
-import org.apache.orc.impl.ReaderImpl;
-
-import com.google.common.collect.Lists;
-
-public class OrcUtils {
-
-  /**
-   * Returns selected columns as a boolean array with true value set for specified column names.
-   * The result will contain number of elements equal to flattened number of columns.
-   * For example:
-   * selectedColumns - a,b,c
-   * allColumns - a,b,c,d
-   * If column c is a complex type, say list<string> and other types are primitives then result will
-   * be [false, true, true, true, true, true, false]
-   * Index 0 is the root element of the struct which is set to false by default, index 1,2
-   * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
-   * index 5 correspond to column d. After flattening list<string> gets 2 columns.
-   *
-   * @param selectedColumns - comma separated list of selected column names
-   * @param schema       - object schema
-   * @return - boolean array with true value set for the specified column names
-   */
-  public static boolean[] includeColumns(String selectedColumns,
-                                         TypeDescription schema) {
-    int numFlattenedCols = schema.getMaximumId();
-    boolean[] results = new boolean[numFlattenedCols + 1];
-    if ("*".equals(selectedColumns)) {
-      Arrays.fill(results, true);
-      return results;
-    }
-    if (selectedColumns != null &&
-        schema.getCategory() == TypeDescription.Category.STRUCT) {
-      List<String> fieldNames = schema.getFieldNames();
-      List<TypeDescription> fields = schema.getChildren();
-      for (String column: selectedColumns.split((","))) {
-        TypeDescription col = findColumn(column, fieldNames, fields);
-        if (col != null) {
-          for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
-            results[i] = true;
-          }
-        }
-      }
-    }
-    return results;
-  }
-
-  private static TypeDescription findColumn(String columnName,
-                                            List<String> fieldNames,
-                                            List<TypeDescription> fields) {
-    int i = 0;
-    for(String fieldName: fieldNames) {
-      if (fieldName.equalsIgnoreCase(columnName)) {
-        return fields.get(i);
-      } else {
-        i += 1;
-      }
-    }
-    return null;
-  }
-
-  public static List<OrcProto.Type> getOrcTypes(TypeDescription typeDescr) {
-    List<OrcProto.Type> result = Lists.newArrayList();
-    appendOrcTypes(result, typeDescr);
-    return result;
-  }
-
-  private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr) {
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    List<TypeDescription> children = typeDescr.getChildren();
-    switch (typeDescr.getCategory()) {
-    case BOOLEAN:
-      type.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      type.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      type.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      type.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      type.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      type.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      type.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      type.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      type.setKind(OrcProto.Type.Kind.CHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case VARCHAR:
-      type.setKind(OrcProto.Type.Kind.VARCHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case BINARY:
-      type.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      type.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      type.setKind(OrcProto.Type.Kind.DECIMAL);
-      type.setPrecision(typeDescr.getPrecision());
-      type.setScale(typeDescr.getScale());
-      break;
-    case LIST:
-      type.setKind(OrcProto.Type.Kind.LIST);
-      type.addSubtypes(children.get(0).getId());
-      break;
-    case MAP:
-      type.setKind(OrcProto.Type.Kind.MAP);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      break;
-    case STRUCT:
-      type.setKind(OrcProto.Type.Kind.STRUCT);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      for(String field: typeDescr.getFieldNames()) {
-        type.addFieldNames(field);
-      }
-      break;
-    case UNION:
-      type.setKind(OrcProto.Type.Kind.UNION);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " +
-          typeDescr.getCategory());
-    }
-    result.add(type.build());
-    if (children != null) {
-      for(TypeDescription child: children) {
-        appendOrcTypes(result, child);
-      }
-    }
-  }
-
-  /**
-   * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
-   * numbers based on the length of the result list being appended.
-   *
-   * @param result
-   * @param typeDescr
-   */
-  public static void appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
-      TypeDescription typeDescr) {
-
-    int subtype = result.size();
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    boolean needsAdd = true;
-    List<TypeDescription> children = typeDescr.getChildren();
-    switch (typeDescr.getCategory()) {
-    case BOOLEAN:
-      type.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      type.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      type.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      type.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      type.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      type.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      type.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      type.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      type.setKind(OrcProto.Type.Kind.CHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case VARCHAR:
-      type.setKind(OrcProto.Type.Kind.VARCHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case BINARY:
-      type.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      type.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      type.setKind(OrcProto.Type.Kind.DECIMAL);
-      type.setPrecision(typeDescr.getPrecision());
-      type.setScale(typeDescr.getScale());
-      break;
-    case LIST:
-      type.setKind(OrcProto.Type.Kind.LIST);
-      type.addSubtypes(++subtype);
-      result.add(type.build());
-      needsAdd = false;
-      appendOrcTypesRebuildSubtypes(result, children.get(0));
-      break;
-    case MAP:
-      {
-        // Make room for MAP type.
-        result.add(null);
-  
-        // Add MAP type pair in order to determine their subtype values.
-        appendOrcTypesRebuildSubtypes(result, children.get(0));
-        int subtype2 = result.size();
-        appendOrcTypesRebuildSubtypes(result, children.get(1));
-        type.setKind(OrcProto.Type.Kind.MAP);
-        type.addSubtypes(subtype + 1);
-        type.addSubtypes(subtype2);
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    case STRUCT:
-      {
-        List<String> fieldNames = typeDescr.getFieldNames();
-
-        // Make room for STRUCT type.
-        result.add(null);
-
-        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
-        for(TypeDescription child: children) {
-          int fieldSubtype = result.size();
-          fieldSubtypes.add(fieldSubtype);
-          appendOrcTypesRebuildSubtypes(result, child);
-        }
-
-        type.setKind(OrcProto.Type.Kind.STRUCT);
-
-        for (int i = 0 ; i < fieldNames.size(); i++) {
-          type.addSubtypes(fieldSubtypes.get(i));
-          type.addFieldNames(fieldNames.get(i));
-        }
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    case UNION:
-      {
-        // Make room for UNION type.
-        result.add(null);
-
-        List<Integer> unionSubtypes = new ArrayList<Integer>(children.size());
-        for(TypeDescription child: children) {
-          int unionSubtype = result.size();
-          unionSubtypes.add(unionSubtype);
-          appendOrcTypesRebuildSubtypes(result, child);
-        }
-
-        type.setKind(OrcProto.Type.Kind.UNION);
-        for (int i = 0 ; i < children.size(); i++) {
-          type.addSubtypes(unionSubtypes.get(i));
-        }
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory());
-    }
-    if (needsAdd) {
-      result.add(type.build());
-    }
-  }
-
-  /**
-   * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype
-   * numbers based on the length of the result list being appended.
-   *
-   * @param result
-   * @param types
-   * @param columnId
-   */
-  public static int appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
-      List<OrcProto.Type> types, int columnId) {
-
-    OrcProto.Type oldType = types.get(columnId++);
-
-    int subtype = result.size();
-    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
-    boolean needsAdd = true;
-    switch (oldType.getKind()) {
-    case BOOLEAN:
-      builder.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      builder.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      builder.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      builder.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      builder.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      builder.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      builder.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      builder.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      builder.setKind(OrcProto.Type.Kind.CHAR);
-      builder.setMaximumLength(oldType.getMaximumLength());
-      break;
-    case VARCHAR:
-      builder.setKind(OrcProto.Type.Kind.VARCHAR);
-      builder.setMaximumLength(oldType.getMaximumLength());
-      break;
-    case BINARY:
-      builder.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      builder.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      builder.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      builder.setKind(OrcProto.Type.Kind.DECIMAL);
-      builder.setPrecision(oldType.getPrecision());
-      builder.setScale(oldType.getScale());
-      break;
-    case LIST:
-      builder.setKind(OrcProto.Type.Kind.LIST);
-      builder.addSubtypes(++subtype);
-      result.add(builder.build());
-      needsAdd = false;
-      columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-      break;
-    case MAP:
-      {
-        // Make room for MAP type.
-        result.add(null);
-  
-        // Add MAP type pair in order to determine their subtype values.
-        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        int subtype2 = result.size();
-        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        builder.setKind(OrcProto.Type.Kind.MAP);
-        builder.addSubtypes(subtype + 1);
-        builder.addSubtypes(subtype2);
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    case STRUCT:
-      {
-        List<String> fieldNames = oldType.getFieldNamesList();
-
-        // Make room for STRUCT type.
-        result.add(null);
-
-        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
-        for(int i = 0 ; i < fieldNames.size(); i++) {
-          int fieldSubtype = result.size();
-          fieldSubtypes.add(fieldSubtype);
-          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        }
-
-        builder.setKind(OrcProto.Type.Kind.STRUCT);
-
-        for (int i = 0 ; i < fieldNames.size(); i++) {
-          builder.addSubtypes(fieldSubtypes.get(i));
-          builder.addFieldNames(fieldNames.get(i));
-        }
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    case UNION:
-      {
-        int subtypeCount = oldType.getSubtypesCount();
-
-        // Make room for UNION type.
-        result.add(null);
-
-        List<Integer> unionSubtypes = new ArrayList<Integer>(subtypeCount);
-        for(int i = 0 ; i < subtypeCount; i++) {
-          int unionSubtype = result.size();
-          unionSubtypes.add(unionSubtype);
-          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        }
-
-        builder.setKind(OrcProto.Type.Kind.UNION);
-        for (int i = 0 ; i < subtypeCount; i++) {
-          builder.addSubtypes(unionSubtypes.get(i));
-        }
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " + oldType.getKind());
-    }
-    if (needsAdd) {
-      result.add(builder.build());
-    }
-    return columnId;
-  }
-
-  /**
-   * Translate the given rootColumn from the list of types to a TypeDescription.
-   * @param types all of the types
-   * @param rootColumn translate this type
-   * @return a new TypeDescription that matches the given rootColumn
-   */
-  public static
-        TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
-                                                int rootColumn) {
-    OrcProto.Type type = types.get(rootColumn);
-    switch (type.getKind()) {
-      case BOOLEAN:
-        return TypeDescription.createBoolean();
-      case BYTE:
-        return TypeDescription.createByte();
-      case SHORT:
-        return TypeDescription.createShort();
-      case INT:
-        return TypeDescription.createInt();
-      case LONG:
-        return TypeDescription.createLong();
-      case FLOAT:
-        return TypeDescription.createFloat();
-      case DOUBLE:
-        return TypeDescription.createDouble();
-      case STRING:
-        return TypeDescription.createString();
-      case CHAR:
-      case VARCHAR: {
-        TypeDescription result = type.getKind() == OrcProto.Type.Kind.CHAR ?
-            TypeDescription.createChar() : TypeDescription.createVarchar();
-        if (type.hasMaximumLength()) {
-          result.withMaxLength(type.getMaximumLength());
-        }
-        return result;
-      }
-      case BINARY:
-        return TypeDescription.createBinary();
-      case TIMESTAMP:
-        return TypeDescription.createTimestamp();
-      case DATE:
-        return TypeDescription.createDate();
-      case DECIMAL: {
-        TypeDescription result = TypeDescription.createDecimal();
-        if (type.hasScale()) {
-          result.withScale(type.getScale());
-        }
-        if (type.hasPrecision()) {
-          result.withPrecision(type.getPrecision());
-        }
-        return result;
-      }
-      case LIST:
-        return TypeDescription.createList(
-            convertTypeFromProtobuf(types, type.getSubtypes(0)));
-      case MAP:
-        return TypeDescription.createMap(
-            convertTypeFromProtobuf(types, type.getSubtypes(0)),
-            convertTypeFromProtobuf(types, type.getSubtypes(1)));
-      case STRUCT: {
-        TypeDescription result = TypeDescription.createStruct();
-        for(int f=0; f < type.getSubtypesCount(); ++f) {
-          result.addField(type.getFieldNames(f),
-              convertTypeFromProtobuf(types, type.getSubtypes(f)));
-        }
-        return result;
-      }
-      case UNION: {
-        TypeDescription result = TypeDescription.createUnion();
-        for(int f=0; f < type.getSubtypesCount(); ++f) {
-          result.addUnionChild(
-              convertTypeFromProtobuf(types, type.getSubtypes(f)));
-        }
-        return result;
-      }
-    }
-    throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
-  }
-
-  public static List<StripeInformation> convertProtoStripesToStripes(
-      List<OrcProto.StripeInformation> stripes) {
-    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
-    for (OrcProto.StripeInformation info : stripes) {
-      result.add(new ReaderImpl.StripeInformationImpl(info));
-    }
-    return result;
-  }
-
-  public static List<TypeDescription> setTypeBuilderFromSchema(
-      OrcProto.Type.Builder type, TypeDescription schema) {
-    List<TypeDescription> children = schema.getChildren();
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-        type.setKind(OrcProto.Type.Kind.BOOLEAN);
-        break;
-      case BYTE:
-        type.setKind(OrcProto.Type.Kind.BYTE);
-        break;
-      case SHORT:
-        type.setKind(OrcProto.Type.Kind.SHORT);
-        break;
-      case INT:
-        type.setKind(OrcProto.Type.Kind.INT);
-        break;
-      case LONG:
-        type.setKind(OrcProto.Type.Kind.LONG);
-        break;
-      case FLOAT:
-        type.setKind(OrcProto.Type.Kind.FLOAT);
-        break;
-      case DOUBLE:
-        type.setKind(OrcProto.Type.Kind.DOUBLE);
-        break;
-      case STRING:
-        type.setKind(OrcProto.Type.Kind.STRING);
-        break;
-      case CHAR:
-        type.setKind(OrcProto.Type.Kind.CHAR);
-        type.setMaximumLength(schema.getMaxLength());
-        break;
-      case VARCHAR:
-        type.setKind(OrcProto.Type.Kind.VARCHAR);
-        type.setMaximumLength(schema.getMaxLength());
-        break;
-      case BINARY:
-        type.setKind(OrcProto.Type.Kind.BINARY);
-        break;
-      case TIMESTAMP:
-        type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-        break;
-      case DATE:
-        type.setKind(OrcProto.Type.Kind.DATE);
-        break;
-      case DECIMAL:
-        type.setKind(OrcProto.Type.Kind.DECIMAL);
-        type.setPrecision(schema.getPrecision());
-        type.setScale(schema.getScale());
-        break;
-      case LIST:
-        type.setKind(OrcProto.Type.Kind.LIST);
-        type.addSubtypes(children.get(0).getId());
-        break;
-      case MAP:
-        type.setKind(OrcProto.Type.Kind.MAP);
-        for(TypeDescription t: children) {
-          type.addSubtypes(t.getId());
-        }
-        break;
-      case STRUCT:
-        type.setKind(OrcProto.Type.Kind.STRUCT);
-        for(TypeDescription t: children) {
-          type.addSubtypes(t.getId());
-        }
-        for(String field: schema.getFieldNames()) {
-          type.addFieldNames(field);
-        }
-        break;
-      case UNION:
-        type.setKind(OrcProto.Type.Kind.UNION);
-        for(TypeDescription t: children) {
-          type.addSubtypes(t.getId());
-        }
-        break;
-      default:
-        throw new IllegalArgumentException("Unknown category: " +
-          schema.getCategory());
-    }
-    return children;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Reader.java b/orc/src/java/org/apache/orc/Reader.java
deleted file mode 100644
index c2d5235..0000000
--- a/orc/src/java/org/apache/orc/Reader.java
+++ /dev/null
@@ -1,375 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-
-/**
- * The interface for reading ORC files.
- *
- * One Reader can support multiple concurrent RecordReader.
- */
-public interface Reader {
-
-  /**
-   * Get the number of rows in the file.
-   * @return the number of rows
-   */
-  long getNumberOfRows();
-
-  /**
-   * Get the deserialized data size of the file
-   * @return raw data size
-   */
-  long getRawDataSize();
-
-  /**
-   * Get the deserialized data size of the specified columns
-   * @param colNames
-   * @return raw data size of columns
-   */
-  long getRawDataSizeOfColumns(List<String> colNames);
-
-  /**
-   * Get the deserialized data size of the specified columns ids
-   * @param colIds - internal column id (check orcfiledump for column ids)
-   * @return raw data size of columns
-   */
-  long getRawDataSizeFromColIndices(List<Integer> colIds);
-
-  /**
-   * Get the user metadata keys.
-   * @return the set of metadata keys
-   */
-  List<String> getMetadataKeys();
-
-  /**
-   * Get a user metadata value.
-   * @param key a key given by the user
-   * @return the bytes associated with the given key
-   */
-  ByteBuffer getMetadataValue(String key);
-
-  /**
-   * Did the user set the given metadata value.
-   * @param key the key to check
-   * @return true if the metadata value was set
-   */
-  boolean hasMetadataValue(String key);
-
-  /**
-   * Get the compression kind.
-   * @return the kind of compression in the file
-   */
-  CompressionKind getCompressionKind();
-
-  /**
-   * Get the buffer size for the compression.
-   * @return number of bytes to buffer for the compression codec.
-   */
-  int getCompressionSize();
-
-  /**
-   * Get the number of rows per a entry in the row index.
-   * @return the number of rows per an entry in the row index or 0 if there
-   * is no row index.
-   */
-  int getRowIndexStride();
-
-  /**
-   * Get the list of stripes.
-   * @return the information about the stripes in order
-   */
-  List<StripeInformation> getStripes();
-
-  /**
-   * Get the length of the file.
-   * @return the number of bytes in the file
-   */
-  long getContentLength();
-
-  /**
-   * Get the statistics about the columns in the file.
-   * @return the information about the column
-   */
-  ColumnStatistics[] getStatistics();
-
-  /**
-   * Get the type of rows in this ORC file.
-   */
-  TypeDescription getSchema();
-
-  /**
-   * Get the list of types contained in the file. The root type is the first
-   * type in the list.
-   * @return the list of flattened types
-   * @deprecated use getSchema instead
-   */
-  List<OrcProto.Type> getTypes();
-
-  /**
-   * Get the file format version.
-   */
-  OrcFile.Version getFileVersion();
-
-  /**
-   * Get the version of the writer of this file.
-   */
-  OrcFile.WriterVersion getWriterVersion();
-
-  /**
-   * Get the file tail (footer + postscript)
-   *
-   * @return - file tail
-   */
-  OrcProto.FileTail getFileTail();
-
-  /**
-   * Options for creating a RecordReader.
-   */
-  public static class Options {
-    private boolean[] include;
-    private long offset = 0;
-    private long length = Long.MAX_VALUE;
-    private SearchArgument sarg = null;
-    private String[] columnNames = null;
-    private Boolean useZeroCopy = null;
-    private Boolean skipCorruptRecords = null;
-    private TypeDescription schema = null;
-    private DataReader dataReader = null;
-
-    /**
-     * Set the list of columns to read.
-     * @param include a list of columns to read
-     * @return this
-     */
-    public Options include(boolean[] include) {
-      this.include = include;
-      return this;
-    }
-
-    /**
-     * Set the range of bytes to read
-     * @param offset the starting byte offset
-     * @param length the number of bytes to read
-     * @return this
-     */
-    public Options range(long offset, long length) {
-      this.offset = offset;
-      this.length = length;
-      return this;
-    }
-
-    /**
-     * Set the schema on read type description.
-     */
-    public Options schema(TypeDescription schema) {
-      this.schema = schema;
-      return this;
-    }
-
-    /**
-     * Set search argument for predicate push down.
-     * @param sarg the search argument
-     * @param columnNames the column names for
-     * @return this
-     */
-    public Options searchArgument(SearchArgument sarg, String[] columnNames) {
-      this.sarg = sarg;
-      this.columnNames = columnNames;
-      return this;
-    }
-
-    /**
-     * Set whether to use zero copy from HDFS.
-     * @param value the new zero copy flag
-     * @return this
-     */
-    public Options useZeroCopy(boolean value) {
-      this.useZeroCopy = value;
-      return this;
-    }
-
-    public Options dataReader(DataReader value) {
-      this.dataReader = value;
-      return this;
-    }
-
-    /**
-     * Set whether to skip corrupt records.
-     * @param value the new skip corrupt records flag
-     * @return this
-     */
-    public Options skipCorruptRecords(boolean value) {
-      this.skipCorruptRecords = value;
-      return this;
-    }
-
-    public boolean[] getInclude() {
-      return include;
-    }
-
-    public long getOffset() {
-      return offset;
-    }
-
-    public long getLength() {
-      return length;
-    }
-
-    public TypeDescription getSchema() {
-      return schema;
-    }
-
-    public SearchArgument getSearchArgument() {
-      return sarg;
-    }
-
-    public String[] getColumnNames() {
-      return columnNames;
-    }
-
-    public long getMaxOffset() {
-      long result = offset + length;
-      if (result < 0) {
-        result = Long.MAX_VALUE;
-      }
-      return result;
-    }
-
-    public Boolean getUseZeroCopy() {
-      return useZeroCopy;
-    }
-
-    public Boolean getSkipCorruptRecords() {
-      return skipCorruptRecords;
-    }
-
-    public DataReader getDataReader() {
-      return dataReader;
-    }
-
-    public Options clone() {
-      Options result = new Options();
-      result.include = include;
-      result.offset = offset;
-      result.length = length;
-      result.sarg = sarg;
-      result.schema = schema;
-      result.columnNames = columnNames;
-      result.useZeroCopy = useZeroCopy;
-      result.skipCorruptRecords = skipCorruptRecords;
-      result.dataReader = dataReader == null ? null : dataReader.clone();
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buffer = new StringBuilder();
-      buffer.append("{include: ");
-      if (include == null) {
-        buffer.append("null");
-      } else {
-        buffer.append("[");
-        for(int i=0; i < include.length; ++i) {
-          if (i != 0) {
-            buffer.append(", ");
-          }
-          buffer.append(include[i]);
-        }
-        buffer.append("]");
-      }
-      buffer.append(", offset: ");
-      buffer.append(offset);
-      buffer.append(", length: ");
-      buffer.append(length);
-      if (sarg != null) {
-        buffer.append(", sarg: ");
-        buffer.append(sarg.toString());
-        buffer.append(", columns: [");
-        for(int i=0; i < columnNames.length; ++i) {
-          if (i != 0) {
-            buffer.append(", ");
-          }
-          buffer.append("'");
-          buffer.append(columnNames[i]);
-          buffer.append("'");
-        }
-        buffer.append("]");
-      }
-      if (schema != null) {
-        buffer.append(", schema: ");
-        schema.printToBuffer(buffer);
-      }
-      buffer.append("}");
-      return buffer.toString();
-    }
-  }
-
-  /**
-   * Create a RecordReader that reads everything with the default options.
-   * @return a new RecordReader
-   * @throws IOException
-   */
-  RecordReader rows() throws IOException;
-
-  /**
-   * Create a RecordReader that uses the options given.
-   * This method can't be named rows, because many callers used rows(null)
-   * before the rows() method was introduced.
-   * @param options the options to read with
-   * @return a new RecordReader
-   * @throws IOException
-   */
-  RecordReader rows(Options options) throws IOException;
-
-  /**
-   * @return List of integers representing version of the file, in order from major to minor.
-   */
-  List<Integer> getVersionList();
-
-  /**
-   * @return Gets the size of metadata, in bytes.
-   */
-  int getMetadataSize();
-
-  /**
-   * @return Stripe statistics, in original protobuf form.
-   */
-  List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
-
-  /**
-   * @return Stripe statistics.
-   */
-  List<StripeStatistics> getStripeStatistics() throws IOException;
-
-  /**
-   * @return File statistics, in original protobuf form.
-   */
-  List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
-
-  /**
-   * @return Serialized file metadata read from disk for the purposes of caching, etc.
-   */
-  ByteBuffer getSerializedFileFooter();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/RecordReader.java b/orc/src/java/org/apache/orc/RecordReader.java
deleted file mode 100644
index 09ba0f0..0000000
--- a/orc/src/java/org/apache/orc/RecordReader.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-/**
- * A row-by-row iterator for ORC files.
- */
-public interface RecordReader {
-  /**
-   * Read the next row batch. The size of the batch to read cannot be
-   * controlled by the callers. Caller need to look at
-   * VectorizedRowBatch.size of the retunred object to know the batch
-   * size read.
-   * @param batch a row batch object to read into
-   * @return were more rows available to read?
-   * @throws java.io.IOException
-   */
-  boolean nextBatch(VectorizedRowBatch batch) throws IOException;
-
-  /**
-   * Get the row number of the row that will be returned by the following
-   * call to next().
-   * @return the row number from 0 to the number of rows in the file
-   * @throws java.io.IOException
-   */
-  long getRowNumber() throws IOException;
-
-  /**
-   * Get the progress of the reader through the rows.
-   * @return a fraction between 0.0 and 1.0 of rows read
-   * @throws java.io.IOException
-   */
-  float getProgress() throws IOException;
-
-  /**
-   * Release the resources associated with the given reader.
-   * @throws java.io.IOException
-   */
-  void close() throws IOException;
-
-  /**
-   * Seek to a particular row number.
-   */
-  void seekToRow(long rowCount) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StringColumnStatistics.java b/orc/src/java/org/apache/orc/StringColumnStatistics.java
deleted file mode 100644
index 5a868d0..0000000
--- a/orc/src/java/org/apache/orc/StringColumnStatistics.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for string columns.
- */
-public interface StringColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum string.
-   * @return the minimum
-   */
-  String getMinimum();
-
-  /**
-   * Get the maximum string.
-   * @return the maximum
-   */
-  String getMaximum();
-
-  /**
-   * Get the total length of all strings
-   * @return the sum (total length)
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StripeInformation.java b/orc/src/java/org/apache/orc/StripeInformation.java
deleted file mode 100644
index 38f7eba..0000000
--- a/orc/src/java/org/apache/orc/StripeInformation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-/**
- * Information about the stripes in an ORC file that is provided by the Reader.
- */
-public interface StripeInformation {
-  /**
-   * Get the byte offset of the start of the stripe.
-   * @return the bytes from the start of the file
-   */
-  long getOffset();
-
-  /**
-   * Get the total length of the stripe in bytes.
-   * @return the number of bytes in the stripe
-   */
-  long getLength();
-
-  /**
-   * Get the length of the stripe's indexes.
-   * @return the number of bytes in the index
-   */
-  long getIndexLength();
-
-  /**
-   * Get the length of the stripe's data.
-   * @return the number of bytes in the stripe
-   */
-  long getDataLength();
-
-  /**
-   * Get the length of the stripe's tail section, which contains its index.
-   * @return the number of bytes in the tail
-   */
-  long getFooterLength();
-
-  /**
-   * Get the number of rows in the stripe.
-   * @return a count of the number of rows
-   */
-  long getNumberOfRows();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/StripeStatistics.java b/orc/src/java/org/apache/orc/StripeStatistics.java
deleted file mode 100644
index 8fc91cb..0000000
--- a/orc/src/java/org/apache/orc/StripeStatistics.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.orc.impl.ColumnStatisticsImpl;
-
-import java.util.List;
-
-public class StripeStatistics {
-  private final List<OrcProto.ColumnStatistics> cs;
-
-  public StripeStatistics(List<OrcProto.ColumnStatistics> list) {
-    this.cs = list;
-  }
-
-  /**
-   * Return list of column statistics
-   *
-   * @return column stats
-   */
-  public ColumnStatistics[] getColumnStatistics() {
-    ColumnStatistics[] result = new ColumnStatistics[cs.size()];
-    for (int i = 0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java b/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
deleted file mode 100644
index 27dc49f..0000000
--- a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.sql.Timestamp;
-
-/**
- * Statistics for Timestamp columns.
- */
-public interface TimestampColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Timestamp getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Timestamp getMaximum();
-}


[28/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RunLengthByteWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RunLengthByteWriter.java b/orc/src/java/org/apache/hive/orc/impl/RunLengthByteWriter.java
new file mode 100644
index 0000000..b94b6a9
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RunLengthByteWriter.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+/**
+ * A streamFactory that writes a sequence of bytes. A control byte is written before
+ * each run with positive values 0 to 127 meaning 2 to 129 repetitions. If the
+ * bytes is -1 to -128, 1 to 128 literal byte values follow.
+ */
+public class RunLengthByteWriter {
+  static final int MIN_REPEAT_SIZE = 3;
+  static final int MAX_LITERAL_SIZE = 128;
+  static final int MAX_REPEAT_SIZE= 127 + MIN_REPEAT_SIZE;
+  private final PositionedOutputStream output;
+  private final byte[] literals = new byte[MAX_LITERAL_SIZE];
+  private int numLiterals = 0;
+  private boolean repeat = false;
+  private int tailRunLength = 0;
+
+  public RunLengthByteWriter(PositionedOutputStream output) {
+    this.output = output;
+  }
+
+  private void writeValues() throws IOException {
+    if (numLiterals != 0) {
+      if (repeat) {
+        output.write(numLiterals - MIN_REPEAT_SIZE);
+        output.write(literals, 0, 1);
+     } else {
+        output.write(-numLiterals);
+        output.write(literals, 0, numLiterals);
+      }
+      repeat = false;
+      tailRunLength = 0;
+      numLiterals = 0;
+    }
+  }
+
+  public void flush() throws IOException {
+    writeValues();
+    output.flush();
+  }
+
+  public void write(byte value) throws IOException {
+    if (numLiterals == 0) {
+      literals[numLiterals++] = value;
+      tailRunLength = 1;
+    } else if (repeat) {
+      if (value == literals[0]) {
+        numLiterals += 1;
+        if (numLiterals == MAX_REPEAT_SIZE) {
+          writeValues();
+        }
+      } else {
+        writeValues();
+        literals[numLiterals++] = value;
+        tailRunLength = 1;
+      }
+    } else {
+      if (value == literals[numLiterals - 1]) {
+        tailRunLength += 1;
+      } else {
+        tailRunLength = 1;
+      }
+      if (tailRunLength == MIN_REPEAT_SIZE) {
+        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
+          repeat = true;
+          numLiterals += 1;
+        } else {
+          numLiterals -= MIN_REPEAT_SIZE - 1;
+          writeValues();
+          literals[0] = value;
+          repeat = true;
+          numLiterals = MIN_REPEAT_SIZE;
+        }
+      } else {
+        literals[numLiterals++] = value;
+        if (numLiterals == MAX_LITERAL_SIZE) {
+          writeValues();
+        }
+      }
+    }
+  }
+
+  public void getPosition(PositionRecorder recorder) throws IOException {
+    output.getPosition(recorder);
+    recorder.addPosition(numLiterals);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReader.java b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReader.java
new file mode 100644
index 0000000..5b613f6
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReader.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+
+/**
+ * A reader that reads a sequence of integers.
+ * */
+public class RunLengthIntegerReader implements IntegerReader {
+  private InStream input;
+  private final boolean signed;
+  private final long[] literals =
+    new long[RunLengthIntegerWriter.MAX_LITERAL_SIZE];
+  private int numLiterals = 0;
+  private int delta = 0;
+  private int used = 0;
+  private boolean repeat = false;
+  private SerializationUtils utils;
+
+  public RunLengthIntegerReader(InStream input, boolean signed) throws IOException {
+    this.input = input;
+    this.signed = signed;
+    this.utils = new SerializationUtils();
+  }
+
+  private void readValues(boolean ignoreEof) throws IOException {
+    int control = input.read();
+    if (control == -1) {
+      if (!ignoreEof) {
+        throw new EOFException("Read past end of RLE integer from " + input);
+      }
+      used = numLiterals = 0;
+      return;
+    } else if (control < 0x80) {
+      numLiterals = control + RunLengthIntegerWriter.MIN_REPEAT_SIZE;
+      used = 0;
+      repeat = true;
+      delta = input.read();
+      if (delta == -1) {
+        throw new EOFException("End of stream in RLE Integer from " + input);
+      }
+      // convert from 0 to 255 to -128 to 127 by converting to a signed byte
+      delta = (byte) (0 + delta);
+      if (signed) {
+        literals[0] = utils.readVslong(input);
+      } else {
+        literals[0] = utils.readVulong(input);
+      }
+    } else {
+      repeat = false;
+      numLiterals = 0x100 - control;
+      used = 0;
+      for(int i=0; i < numLiterals; ++i) {
+        if (signed) {
+          literals[i] = utils.readVslong(input);
+        } else {
+          literals[i] = utils.readVulong(input);
+        }
+      }
+    }
+  }
+
+  @Override
+  public boolean hasNext() throws IOException {
+    return used != numLiterals || input.available() > 0;
+  }
+
+  @Override
+  public long next() throws IOException {
+    long result;
+    if (used == numLiterals) {
+      readValues(false);
+    }
+    if (repeat) {
+      result = literals[0] + (used++) * delta;
+    } else {
+      result = literals[used++];
+    }
+    return result;
+  }
+
+  @Override
+  public void nextVector(ColumnVector previous,
+                         long[] data,
+                         int previousLen) throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        data[i] = next();
+      } else {
+        // The default value of null for int type in vectorized
+        // processing is 1, so set that if the value is null
+        data[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && (data[0] != data[i] || previous.isNull[0] != previous.isNull[i])) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
+  @Override
+  public void nextVector(ColumnVector vector,
+                         int[] data,
+                         int size) throws IOException {
+    if (vector.noNulls) {
+      for(int r=0; r < data.length && r < size; ++r) {
+        data[r] = (int) next();
+      }
+    } else if (!(vector.isRepeating && vector.isNull[0])) {
+      for(int r=0; r < data.length && r < size; ++r) {
+        if (!vector.isNull[r]) {
+          data[r] = (int) next();
+        } else {
+          data[r] = 1;
+        }
+      }
+    }
+  }
+
+  @Override
+  public void seek(PositionProvider index) throws IOException {
+    input.seek(index);
+    int consumed = (int) index.getNext();
+    if (consumed != 0) {
+      // a loop is required for cases where we break the run into two parts
+      while (consumed > 0) {
+        readValues(false);
+        used = consumed;
+        consumed -= numLiterals;
+      }
+    } else {
+      used = 0;
+      numLiterals = 0;
+    }
+  }
+
+  @Override
+  public void skip(long numValues) throws IOException {
+    while (numValues > 0) {
+      if (used == numLiterals) {
+        readValues(false);
+      }
+      long consume = Math.min(numValues, numLiterals - used);
+      used += consume;
+      numValues -= consume;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReaderV2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReaderV2.java b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReaderV2.java
new file mode 100644
index 0000000..d0c2b54
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerReaderV2.java
@@ -0,0 +1,406 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A reader that reads a sequence of light weight compressed integers. Refer
+ * {@link RunLengthIntegerWriterV2} for description of various lightweight
+ * compression techniques.
+ */
+public class RunLengthIntegerReaderV2 implements IntegerReader {
+  public static final Logger LOG = LoggerFactory.getLogger(RunLengthIntegerReaderV2.class);
+
+  private InStream input;
+  private final boolean signed;
+  private final long[] literals = new long[RunLengthIntegerWriterV2.MAX_SCOPE];
+  private boolean isRepeating = false;
+  private int numLiterals = 0;
+  private int used = 0;
+  private final boolean skipCorrupt;
+  private final SerializationUtils utils;
+  private RunLengthIntegerWriterV2.EncodingType currentEncoding;
+
+  public RunLengthIntegerReaderV2(InStream input, boolean signed,
+      boolean skipCorrupt) throws IOException {
+    this.input = input;
+    this.signed = signed;
+    this.skipCorrupt = skipCorrupt;
+    this.utils = new SerializationUtils();
+  }
+
+  private final static RunLengthIntegerWriterV2.EncodingType[] encodings = RunLengthIntegerWriterV2.EncodingType.values();
+  private void readValues(boolean ignoreEof) throws IOException {
+    // read the first 2 bits and determine the encoding type
+    isRepeating = false;
+    int firstByte = input.read();
+    if (firstByte < 0) {
+      if (!ignoreEof) {
+        throw new EOFException("Read past end of RLE integer from " + input);
+      }
+      used = numLiterals = 0;
+      return;
+    }
+    currentEncoding = encodings[(firstByte >>> 6) & 0x03];
+    switch (currentEncoding) {
+    case SHORT_REPEAT: readShortRepeatValues(firstByte); break;
+    case DIRECT: readDirectValues(firstByte); break;
+    case PATCHED_BASE: readPatchedBaseValues(firstByte); break;
+    case DELTA: readDeltaValues(firstByte); break;
+    default: throw new IOException("Unknown encoding " + currentEncoding);
+    }
+  }
+
+  private void readDeltaValues(int firstByte) throws IOException {
+
+    // extract the number of fixed bits
+    int fb = (firstByte >>> 1) & 0x1f;
+    if (fb != 0) {
+      fb = utils.decodeBitWidth(fb);
+    }
+
+    // extract the blob run length
+    int len = (firstByte & 0x01) << 8;
+    len |= input.read();
+
+    // read the first value stored as vint
+    long firstVal = 0;
+    if (signed) {
+      firstVal = utils.readVslong(input);
+    } else {
+      firstVal = utils.readVulong(input);
+    }
+
+    // store first value to result buffer
+    long prevVal = firstVal;
+    literals[numLiterals++] = firstVal;
+
+    // if fixed bits is 0 then all values have fixed delta
+    if (fb == 0) {
+      // read the fixed delta value stored as vint (deltas can be negative even
+      // if all number are positive)
+      long fd = utils.readVslong(input);
+      if (fd == 0) {
+        isRepeating = true;
+        assert numLiterals == 1;
+        Arrays.fill(literals, numLiterals, numLiterals + len, literals[0]);
+        numLiterals += len;
+      } else {
+        // add fixed deltas to adjacent values
+        for(int i = 0; i < len; i++) {
+          literals[numLiterals++] = literals[numLiterals - 2] + fd;
+        }
+      }
+    } else {
+      long deltaBase = utils.readVslong(input);
+      // add delta base and first value
+      literals[numLiterals++] = firstVal + deltaBase;
+      prevVal = literals[numLiterals - 1];
+      len -= 1;
+
+      // write the unpacked values, add it to previous value and store final
+      // value to result buffer. if the delta base value is negative then it
+      // is a decreasing sequence else an increasing sequence
+      utils.readInts(literals, numLiterals, len, fb, input);
+      while (len > 0) {
+        if (deltaBase < 0) {
+          literals[numLiterals] = prevVal - literals[numLiterals];
+        } else {
+          literals[numLiterals] = prevVal + literals[numLiterals];
+        }
+        prevVal = literals[numLiterals];
+        len--;
+        numLiterals++;
+      }
+    }
+  }
+
+  private void readPatchedBaseValues(int firstByte) throws IOException {
+
+    // extract the number of fixed bits
+    int fbo = (firstByte >>> 1) & 0x1f;
+    int fb = utils.decodeBitWidth(fbo);
+
+    // extract the run length of data blob
+    int len = (firstByte & 0x01) << 8;
+    len |= input.read();
+    // runs are always one off
+    len += 1;
+
+    // extract the number of bytes occupied by base
+    int thirdByte = input.read();
+    int bw = (thirdByte >>> 5) & 0x07;
+    // base width is one off
+    bw += 1;
+
+    // extract patch width
+    int pwo = thirdByte & 0x1f;
+    int pw = utils.decodeBitWidth(pwo);
+
+    // read fourth byte and extract patch gap width
+    int fourthByte = input.read();
+    int pgw = (fourthByte >>> 5) & 0x07;
+    // patch gap width is one off
+    pgw += 1;
+
+    // extract the length of the patch list
+    int pl = fourthByte & 0x1f;
+
+    // read the next base width number of bytes to extract base value
+    long base = utils.bytesToLongBE(input, bw);
+    long mask = (1L << ((bw * 8) - 1));
+    // if MSB of base value is 1 then base is negative value else positive
+    if ((base & mask) != 0) {
+      base = base & ~mask;
+      base = -base;
+    }
+
+    // unpack the data blob
+    long[] unpacked = new long[len];
+    utils.readInts(unpacked, 0, len, fb, input);
+
+    // unpack the patch blob
+    long[] unpackedPatch = new long[pl];
+
+    if ((pw + pgw) > 64 && !skipCorrupt) {
+      throw new IOException("Corruption in ORC data encountered. To skip" +
+          " reading corrupted data, set hive.exec.orc.skip.corrupt.data to" +
+          " true");
+    }
+    int bitSize = utils.getClosestFixedBits(pw + pgw);
+    utils.readInts(unpackedPatch, 0, pl, bitSize, input);
+
+    // apply the patch directly when decoding the packed data
+    int patchIdx = 0;
+    long currGap = 0;
+    long currPatch = 0;
+    long patchMask = ((1L << pw) - 1);
+    currGap = unpackedPatch[patchIdx] >>> pw;
+    currPatch = unpackedPatch[patchIdx] & patchMask;
+    long actualGap = 0;
+
+    // special case: gap is >255 then patch value will be 0.
+    // if gap is <=255 then patch value cannot be 0
+    while (currGap == 255 && currPatch == 0) {
+      actualGap += 255;
+      patchIdx++;
+      currGap = unpackedPatch[patchIdx] >>> pw;
+      currPatch = unpackedPatch[patchIdx] & patchMask;
+    }
+    // add the left over gap
+    actualGap += currGap;
+
+    // unpack data blob, patch it (if required), add base to get final result
+    for(int i = 0; i < unpacked.length; i++) {
+      if (i == actualGap) {
+        // extract the patch value
+        long patchedVal = unpacked[i] | (currPatch << fb);
+
+        // add base to patched value
+        literals[numLiterals++] = base + patchedVal;
+
+        // increment the patch to point to next entry in patch list
+        patchIdx++;
+
+        if (patchIdx < pl) {
+          // read the next gap and patch
+          currGap = unpackedPatch[patchIdx] >>> pw;
+          currPatch = unpackedPatch[patchIdx] & patchMask;
+          actualGap = 0;
+
+          // special case: gap is >255 then patch will be 0. if gap is
+          // <=255 then patch cannot be 0
+          while (currGap == 255 && currPatch == 0) {
+            actualGap += 255;
+            patchIdx++;
+            currGap = unpackedPatch[patchIdx] >>> pw;
+            currPatch = unpackedPatch[patchIdx] & patchMask;
+          }
+          // add the left over gap
+          actualGap += currGap;
+
+          // next gap is relative to the current gap
+          actualGap += i;
+        }
+      } else {
+        // no patching required. add base to unpacked value to get final value
+        literals[numLiterals++] = base + unpacked[i];
+      }
+    }
+
+  }
+
+  private void readDirectValues(int firstByte) throws IOException {
+
+    // extract the number of fixed bits
+    int fbo = (firstByte >>> 1) & 0x1f;
+    int fb = utils.decodeBitWidth(fbo);
+
+    // extract the run length
+    int len = (firstByte & 0x01) << 8;
+    len |= input.read();
+    // runs are one off
+    len += 1;
+
+    // write the unpacked values and zigzag decode to result buffer
+    utils.readInts(literals, numLiterals, len, fb, input);
+    if (signed) {
+      for(int i = 0; i < len; i++) {
+        literals[numLiterals] = utils.zigzagDecode(literals[numLiterals]);
+        numLiterals++;
+      }
+    } else {
+      numLiterals += len;
+    }
+  }
+
+  private void readShortRepeatValues(int firstByte) throws IOException {
+
+    // read the number of bytes occupied by the value
+    int size = (firstByte >>> 3) & 0x07;
+    // #bytes are one off
+    size += 1;
+
+    // read the run length
+    int len = firstByte & 0x07;
+    // run lengths values are stored only after MIN_REPEAT value is met
+    len += RunLengthIntegerWriterV2.MIN_REPEAT;
+
+    // read the repeated value which is store using fixed bytes
+    long val = utils.bytesToLongBE(input, size);
+
+    if (signed) {
+      val = utils.zigzagDecode(val);
+    }
+
+    if (numLiterals != 0) {
+      // Currently this always holds, which makes peekNextAvailLength simpler.
+      // If this changes, peekNextAvailLength should be adjusted accordingly.
+      throw new AssertionError("readValues called with existing values present");
+    }
+    // repeat the value for length times
+    isRepeating = true;
+    // TODO: this is not so useful and V1 reader doesn't do that. Fix? Same if delta == 0
+    for(int i = 0; i < len; i++) {
+      literals[i] = val;
+    }
+    numLiterals = len;
+  }
+
+  @Override
+  public boolean hasNext() throws IOException {
+    return used != numLiterals || input.available() > 0;
+  }
+
+  @Override
+  public long next() throws IOException {
+    long result;
+    if (used == numLiterals) {
+      numLiterals = 0;
+      used = 0;
+      readValues(false);
+    }
+    result = literals[used++];
+    return result;
+  }
+
+  @Override
+  public void seek(PositionProvider index) throws IOException {
+    input.seek(index);
+    int consumed = (int) index.getNext();
+    if (consumed != 0) {
+      // a loop is required for cases where we break the run into two
+      // parts
+      while (consumed > 0) {
+        numLiterals = 0;
+        readValues(false);
+        used = consumed;
+        consumed -= numLiterals;
+      }
+    } else {
+      used = 0;
+      numLiterals = 0;
+    }
+  }
+
+  @Override
+  public void skip(long numValues) throws IOException {
+    while (numValues > 0) {
+      if (used == numLiterals) {
+        numLiterals = 0;
+        used = 0;
+        readValues(false);
+      }
+      long consume = Math.min(numValues, numLiterals - used);
+      used += consume;
+      numValues -= consume;
+    }
+  }
+
+  @Override
+  public void nextVector(ColumnVector previous,
+                         long[] data,
+                         int previousLen) throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        data[i] = next();
+      } else {
+        // The default value of null for int type in vectorized
+        // processing is 1, so set that if the value is null
+        data[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && (data[0] != data[i] ||
+          previous.isNull[0] != previous.isNull[i])) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
+  @Override
+  public void nextVector(ColumnVector vector,
+                         int[] data,
+                         int size) throws IOException {
+    if (vector.noNulls) {
+      for(int r=0; r < data.length && r < size; ++r) {
+        data[r] = (int) next();
+      }
+    } else if (!(vector.isRepeating && vector.isNull[0])) {
+      for(int r=0; r < data.length && r < size; ++r) {
+        if (!vector.isNull[r]) {
+          data[r] = (int) next();
+        } else {
+          data[r] = 1;
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriter.java b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriter.java
new file mode 100644
index 0000000..2153001
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriter.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+/**
+ * A streamFactory that writes a sequence of integers. A control byte is written before
+ * each run with positive values 0 to 127 meaning 3 to 130 repetitions, each
+ * repetition is offset by a delta. If the control byte is -1 to -128, 1 to 128
+ * literal vint values follow.
+ */
+public class RunLengthIntegerWriter implements IntegerWriter {
+  static final int MIN_REPEAT_SIZE = 3;
+  static final int MAX_DELTA = 127;
+  static final int MIN_DELTA = -128;
+  static final int MAX_LITERAL_SIZE = 128;
+  private static final int MAX_REPEAT_SIZE = 127 + MIN_REPEAT_SIZE;
+  private final PositionedOutputStream output;
+  private final boolean signed;
+  private final long[] literals = new long[MAX_LITERAL_SIZE];
+  private int numLiterals = 0;
+  private long delta = 0;
+  private boolean repeat = false;
+  private int tailRunLength = 0;
+  private SerializationUtils utils;
+
+  public RunLengthIntegerWriter(PositionedOutputStream output,
+                         boolean signed) {
+    this.output = output;
+    this.signed = signed;
+    this.utils = new SerializationUtils();
+  }
+
+  private void writeValues() throws IOException {
+    if (numLiterals != 0) {
+      if (repeat) {
+        output.write(numLiterals - MIN_REPEAT_SIZE);
+        output.write((byte) delta);
+        if (signed) {
+          utils.writeVslong(output, literals[0]);
+        } else {
+          utils.writeVulong(output, literals[0]);
+        }
+      } else {
+        output.write(-numLiterals);
+        for(int i=0; i < numLiterals; ++i) {
+          if (signed) {
+            utils.writeVslong(output, literals[i]);
+          } else {
+            utils.writeVulong(output, literals[i]);
+          }
+        }
+      }
+      repeat = false;
+      numLiterals = 0;
+      tailRunLength = 0;
+    }
+  }
+
+  @Override
+  public void flush() throws IOException {
+    writeValues();
+    output.flush();
+  }
+
+  @Override
+  public void write(long value) throws IOException {
+    if (numLiterals == 0) {
+      literals[numLiterals++] = value;
+      tailRunLength = 1;
+    } else if (repeat) {
+      if (value == literals[0] + delta * numLiterals) {
+        numLiterals += 1;
+        if (numLiterals == MAX_REPEAT_SIZE) {
+          writeValues();
+        }
+      } else {
+        writeValues();
+        literals[numLiterals++] = value;
+        tailRunLength = 1;
+      }
+    } else {
+      if (tailRunLength == 1) {
+        delta = value - literals[numLiterals - 1];
+        if (delta < MIN_DELTA || delta > MAX_DELTA) {
+          tailRunLength = 1;
+        } else {
+          tailRunLength = 2;
+        }
+      } else if (value == literals[numLiterals - 1] + delta) {
+        tailRunLength += 1;
+      } else {
+        delta = value - literals[numLiterals - 1];
+        if (delta < MIN_DELTA || delta > MAX_DELTA) {
+          tailRunLength = 1;
+        } else {
+          tailRunLength = 2;
+        }
+      }
+      if (tailRunLength == MIN_REPEAT_SIZE) {
+        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
+          repeat = true;
+          numLiterals += 1;
+        } else {
+          numLiterals -= MIN_REPEAT_SIZE - 1;
+          long base = literals[numLiterals];
+          writeValues();
+          literals[0] = base;
+          repeat = true;
+          numLiterals = MIN_REPEAT_SIZE;
+        }
+      } else {
+        literals[numLiterals++] = value;
+        if (numLiterals == MAX_LITERAL_SIZE) {
+          writeValues();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void getPosition(PositionRecorder recorder) throws IOException {
+    output.getPosition(recorder);
+    recorder.addPosition(numLiterals);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriterV2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriterV2.java b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriterV2.java
new file mode 100644
index 0000000..1140ab4
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RunLengthIntegerWriterV2.java
@@ -0,0 +1,831 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+/**
+ * A writer that performs light weight compression over sequence of integers.
+ * <p>
+ * There are four types of lightweight integer compression
+ * <ul>
+ * <li>SHORT_REPEAT</li>
+ * <li>DIRECT</li>
+ * <li>PATCHED_BASE</li>
+ * <li>DELTA</li>
+ * </ul>
+ * </p>
+ * The description and format for these types are as below:
+ * <p>
+ * <b>SHORT_REPEAT:</b> Used for short repeated integer sequences.
+ * <ul>
+ * <li>1 byte header
+ * <ul>
+ * <li>2 bits for encoding type</li>
+ * <li>3 bits for bytes required for repeating value</li>
+ * <li>3 bits for repeat count (MIN_REPEAT + run length)</li>
+ * </ul>
+ * </li>
+ * <li>Blob - repeat value (fixed bytes)</li>
+ * </ul>
+ * </p>
+ * <p>
+ * <b>DIRECT:</b> Used for random integer sequences whose number of bit
+ * requirement doesn't vary a lot.
+ * <ul>
+ * <li>2 bytes header
+ * <ul>
+ * 1st byte
+ * <li>2 bits for encoding type</li>
+ * <li>5 bits for fixed bit width of values in blob</li>
+ * <li>1 bit for storing MSB of run length</li>
+ * </ul>
+ * <ul>
+ * 2nd byte
+ * <li>8 bits for lower run length bits</li>
+ * </ul>
+ * </li>
+ * <li>Blob - stores the direct values using fixed bit width. The length of the
+ * data blob is (fixed width * run length) bits long</li>
+ * </ul>
+ * </p>
+ * <p>
+ * <b>PATCHED_BASE:</b> Used for random integer sequences whose number of bit
+ * requirement varies beyond a threshold.
+ * <ul>
+ * <li>4 bytes header
+ * <ul>
+ * 1st byte
+ * <li>2 bits for encoding type</li>
+ * <li>5 bits for fixed bit width of values in blob</li>
+ * <li>1 bit for storing MSB of run length</li>
+ * </ul>
+ * <ul>
+ * 2nd byte
+ * <li>8 bits for lower run length bits</li>
+ * </ul>
+ * <ul>
+ * 3rd byte
+ * <li>3 bits for bytes required to encode base value</li>
+ * <li>5 bits for patch width</li>
+ * </ul>
+ * <ul>
+ * 4th byte
+ * <li>3 bits for patch gap width</li>
+ * <li>5 bits for patch length</li>
+ * </ul>
+ * </li>
+ * <li>Base value - Stored using fixed number of bytes. If MSB is set, base
+ * value is negative else positive. Length of base value is (base width * 8)
+ * bits.</li>
+ * <li>Data blob - Base reduced values as stored using fixed bit width. Length
+ * of data blob is (fixed width * run length) bits.</li>
+ * <li>Patch blob - Patch blob is a list of gap and patch value. Each entry in
+ * the patch list is (patch width + patch gap width) bits long. Gap between the
+ * subsequent elements to be patched are stored in upper part of entry whereas
+ * patch values are stored in lower part of entry. Length of patch blob is
+ * ((patch width + patch gap width) * patch length) bits.</li>
+ * </ul>
+ * </p>
+ * <p>
+ * <b>DELTA</b> Used for monotonically increasing or decreasing sequences,
+ * sequences with fixed delta values or long repeated sequences.
+ * <ul>
+ * <li>2 bytes header
+ * <ul>
+ * 1st byte
+ * <li>2 bits for encoding type</li>
+ * <li>5 bits for fixed bit width of values in blob</li>
+ * <li>1 bit for storing MSB of run length</li>
+ * </ul>
+ * <ul>
+ * 2nd byte
+ * <li>8 bits for lower run length bits</li>
+ * </ul>
+ * </li>
+ * <li>Base value - zigzag encoded value written as varint</li>
+ * <li>Delta base - zigzag encoded value written as varint</li>
+ * <li>Delta blob - only positive values. monotonicity and orderness are decided
+ * based on the sign of the base value and delta base</li>
+ * </ul>
+ * </p>
+ */
+public class RunLengthIntegerWriterV2 implements IntegerWriter {
+
+  public enum EncodingType {
+    SHORT_REPEAT, DIRECT, PATCHED_BASE, DELTA
+  }
+
+  static final int MAX_SCOPE = 512;
+  static final int MIN_REPEAT = 3;
+  private static final int MAX_SHORT_REPEAT_LENGTH = 10;
+  private long prevDelta = 0;
+  private int fixedRunLength = 0;
+  private int variableRunLength = 0;
+  private final long[] literals = new long[MAX_SCOPE];
+  private final PositionedOutputStream output;
+  private final boolean signed;
+  private EncodingType encoding;
+  private int numLiterals;
+  private final long[] zigzagLiterals = new long[MAX_SCOPE];
+  private final long[] baseRedLiterals = new long[MAX_SCOPE];
+  private final long[] adjDeltas = new long[MAX_SCOPE];
+  private long fixedDelta;
+  private int zzBits90p;
+  private int zzBits100p;
+  private int brBits95p;
+  private int brBits100p;
+  private int bitsDeltaMax;
+  private int patchWidth;
+  private int patchGapWidth;
+  private int patchLength;
+  private long[] gapVsPatchList;
+  private long min;
+  private boolean isFixedDelta;
+  private SerializationUtils utils;
+  private boolean alignedBitpacking;
+
+  RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed) {
+    this(output, signed, true);
+  }
+
+  public RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed,
+      boolean alignedBitpacking) {
+    this.output = output;
+    this.signed = signed;
+    this.alignedBitpacking = alignedBitpacking;
+    this.utils = new SerializationUtils();
+    clear();
+  }
+
+  private void writeValues() throws IOException {
+    if (numLiterals != 0) {
+
+      if (encoding.equals(EncodingType.SHORT_REPEAT)) {
+        writeShortRepeatValues();
+      } else if (encoding.equals(EncodingType.DIRECT)) {
+        writeDirectValues();
+      } else if (encoding.equals(EncodingType.PATCHED_BASE)) {
+        writePatchedBaseValues();
+      } else {
+        writeDeltaValues();
+      }
+
+      // clear all the variables
+      clear();
+    }
+  }
+
+  private void writeDeltaValues() throws IOException {
+    int len = 0;
+    int fb = bitsDeltaMax;
+    int efb = 0;
+
+    if (alignedBitpacking) {
+      fb = utils.getClosestAlignedFixedBits(fb);
+    }
+
+    if (isFixedDelta) {
+      // if fixed run length is greater than threshold then it will be fixed
+      // delta sequence with delta value 0 else fixed delta sequence with
+      // non-zero delta value
+      if (fixedRunLength > MIN_REPEAT) {
+        // ex. sequence: 2 2 2 2 2 2 2 2
+        len = fixedRunLength - 1;
+        fixedRunLength = 0;
+      } else {
+        // ex. sequence: 4 6 8 10 12 14 16
+        len = variableRunLength - 1;
+        variableRunLength = 0;
+      }
+    } else {
+      // fixed width 0 is used for long repeating values.
+      // sequences that require only 1 bit to encode will have an additional bit
+      if (fb == 1) {
+        fb = 2;
+      }
+      efb = utils.encodeBitWidth(fb);
+      efb = efb << 1;
+      len = variableRunLength - 1;
+      variableRunLength = 0;
+    }
+
+    // extract the 9th bit of run length
+    final int tailBits = (len & 0x100) >>> 8;
+
+    // create first byte of the header
+    final int headerFirstByte = getOpcode() | efb | tailBits;
+
+    // second byte of the header stores the remaining 8 bits of runlength
+    final int headerSecondByte = len & 0xff;
+
+    // write header
+    output.write(headerFirstByte);
+    output.write(headerSecondByte);
+
+    // store the first value from zigzag literal array
+    if (signed) {
+      utils.writeVslong(output, literals[0]);
+    } else {
+      utils.writeVulong(output, literals[0]);
+    }
+
+    if (isFixedDelta) {
+      // if delta is fixed then we don't need to store delta blob
+      utils.writeVslong(output, fixedDelta);
+    } else {
+      // store the first value as delta value using zigzag encoding
+      utils.writeVslong(output, adjDeltas[0]);
+
+      // adjacent delta values are bit packed. The length of adjDeltas array is
+      // always one less than the number of literals (delta difference for n
+      // elements is n-1). We have already written one element, write the
+      // remaining numLiterals - 2 elements here
+      utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output);
+    }
+  }
+
+  private void writePatchedBaseValues() throws IOException {
+
+    // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding
+    // because patch is applied to MSB bits. For example: If fixed bit width of
+    // base value is 7 bits and if patch is 3 bits, the actual value is
+    // constructed by shifting the patch to left by 7 positions.
+    // actual_value = patch << 7 | base_value
+    // So, if we align base_value then actual_value can not be reconstructed.
+
+    // write the number of fixed bits required in next 5 bits
+    final int fb = brBits95p;
+    final int efb = utils.encodeBitWidth(fb) << 1;
+
+    // adjust variable run length, they are one off
+    variableRunLength -= 1;
+
+    // extract the 9th bit of run length
+    final int tailBits = (variableRunLength & 0x100) >>> 8;
+
+    // create first byte of the header
+    final int headerFirstByte = getOpcode() | efb | tailBits;
+
+    // second byte of the header stores the remaining 8 bits of runlength
+    final int headerSecondByte = variableRunLength & 0xff;
+
+    // if the min value is negative toggle the sign
+    final boolean isNegative = min < 0 ? true : false;
+    if (isNegative) {
+      min = -min;
+    }
+
+    // find the number of bytes required for base and shift it by 5 bits
+    // to accommodate patch width. The additional bit is used to store the sign
+    // of the base value.
+    final int baseWidth = utils.findClosestNumBits(min) + 1;
+    final int baseBytes = baseWidth % 8 == 0 ? baseWidth / 8 : (baseWidth / 8) + 1;
+    final int bb = (baseBytes - 1) << 5;
+
+    // if the base value is negative then set MSB to 1
+    if (isNegative) {
+      min |= (1L << ((baseBytes * 8) - 1));
+    }
+
+    // third byte contains 3 bits for number of bytes occupied by base
+    // and 5 bits for patchWidth
+    final int headerThirdByte = bb | utils.encodeBitWidth(patchWidth);
+
+    // fourth byte contains 3 bits for page gap width and 5 bits for
+    // patch length
+    final int headerFourthByte = (patchGapWidth - 1) << 5 | patchLength;
+
+    // write header
+    output.write(headerFirstByte);
+    output.write(headerSecondByte);
+    output.write(headerThirdByte);
+    output.write(headerFourthByte);
+
+    // write the base value using fixed bytes in big endian order
+    for(int i = baseBytes - 1; i >= 0; i--) {
+      byte b = (byte) ((min >>> (i * 8)) & 0xff);
+      output.write(b);
+    }
+
+    // base reduced literals are bit packed
+    int closestFixedBits = utils.getClosestFixedBits(fb);
+
+    utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits,
+        output);
+
+    // write patch list
+    closestFixedBits = utils.getClosestFixedBits(patchGapWidth + patchWidth);
+
+    utils.writeInts(gapVsPatchList, 0, gapVsPatchList.length, closestFixedBits,
+        output);
+
+    // reset run length
+    variableRunLength = 0;
+  }
+
+  /**
+   * Store the opcode in 2 MSB bits
+   * @return opcode
+   */
+  private int getOpcode() {
+    return encoding.ordinal() << 6;
+  }
+
+  private void writeDirectValues() throws IOException {
+
+    // write the number of fixed bits required in next 5 bits
+    int fb = zzBits100p;
+
+    if (alignedBitpacking) {
+      fb = utils.getClosestAlignedFixedBits(fb);
+    }
+
+    final int efb = utils.encodeBitWidth(fb) << 1;
+
+    // adjust variable run length
+    variableRunLength -= 1;
+
+    // extract the 9th bit of run length
+    final int tailBits = (variableRunLength & 0x100) >>> 8;
+
+    // create first byte of the header
+    final int headerFirstByte = getOpcode() | efb | tailBits;
+
+    // second byte of the header stores the remaining 8 bits of runlength
+    final int headerSecondByte = variableRunLength & 0xff;
+
+    // write header
+    output.write(headerFirstByte);
+    output.write(headerSecondByte);
+
+    // bit packing the zigzag encoded literals
+    utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output);
+
+    // reset run length
+    variableRunLength = 0;
+  }
+
+  private void writeShortRepeatValues() throws IOException {
+    // get the value that is repeating, compute the bits and bytes required
+    long repeatVal = 0;
+    if (signed) {
+      repeatVal = utils.zigzagEncode(literals[0]);
+    } else {
+      repeatVal = literals[0];
+    }
+
+    final int numBitsRepeatVal = utils.findClosestNumBits(repeatVal);
+    final int numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? numBitsRepeatVal >>> 3
+        : (numBitsRepeatVal >>> 3) + 1;
+
+    // write encoding type in top 2 bits
+    int header = getOpcode();
+
+    // write the number of bytes required for the value
+    header |= ((numBytesRepeatVal - 1) << 3);
+
+    // write the run length
+    fixedRunLength -= MIN_REPEAT;
+    header |= fixedRunLength;
+
+    // write the header
+    output.write(header);
+
+    // write the repeating value in big endian byte order
+    for(int i = numBytesRepeatVal - 1; i >= 0; i--) {
+      int b = (int) ((repeatVal >>> (i * 8)) & 0xff);
+      output.write(b);
+    }
+
+    fixedRunLength = 0;
+  }
+
+  private void determineEncoding() {
+
+    // we need to compute zigzag values for DIRECT encoding if we decide to
+    // break early for delta overflows or for shorter runs
+    computeZigZagLiterals();
+
+    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
+
+    // not a big win for shorter runs to determine encoding
+    if (numLiterals <= MIN_REPEAT) {
+      encoding = EncodingType.DIRECT;
+      return;
+    }
+
+    // DELTA encoding check
+
+    // for identifying monotonic sequences
+    boolean isIncreasing = true;
+    boolean isDecreasing = true;
+    this.isFixedDelta = true;
+
+    this.min = literals[0];
+    long max = literals[0];
+    final long initialDelta = literals[1] - literals[0];
+    long currDelta = initialDelta;
+    long deltaMax = initialDelta;
+    this.adjDeltas[0] = initialDelta;
+
+    for (int i = 1; i < numLiterals; i++) {
+      final long l1 = literals[i];
+      final long l0 = literals[i - 1];
+      currDelta = l1 - l0;
+      min = Math.min(min, l1);
+      max = Math.max(max, l1);
+
+      isIncreasing &= (l0 <= l1);
+      isDecreasing &= (l0 >= l1);
+
+      isFixedDelta &= (currDelta == initialDelta);
+      if (i > 1) {
+        adjDeltas[i - 1] = Math.abs(currDelta);
+        deltaMax = Math.max(deltaMax, adjDeltas[i - 1]);
+      }
+    }
+
+    // its faster to exit under delta overflow condition without checking for
+    // PATCHED_BASE condition as encoding using DIRECT is faster and has less
+    // overhead than PATCHED_BASE
+    if (!utils.isSafeSubtract(max, min)) {
+      encoding = EncodingType.DIRECT;
+      return;
+    }
+
+    // invariant - subtracting any number from any other in the literals after
+    // this point won't overflow
+
+    // if min is equal to max then the delta is 0, this condition happens for
+    // fixed values run >10 which cannot be encoded with SHORT_REPEAT
+    if (min == max) {
+      assert isFixedDelta : min + "==" + max +
+          ", isFixedDelta cannot be false";
+      assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
+      fixedDelta = 0;
+      encoding = EncodingType.DELTA;
+      return;
+    }
+
+    if (isFixedDelta) {
+      assert currDelta == initialDelta
+          : "currDelta should be equal to initialDelta for fixed delta encoding";
+      encoding = EncodingType.DELTA;
+      fixedDelta = currDelta;
+      return;
+    }
+
+    // if initialDelta is 0 then we cannot delta encode as we cannot identify
+    // the sign of deltas (increasing or decreasing)
+    if (initialDelta != 0) {
+      // stores the number of bits required for packing delta blob in
+      // delta encoding
+      bitsDeltaMax = utils.findClosestNumBits(deltaMax);
+
+      // monotonic condition
+      if (isIncreasing || isDecreasing) {
+        encoding = EncodingType.DELTA;
+        return;
+      }
+    }
+
+    // PATCHED_BASE encoding check
+
+    // percentile values are computed for the zigzag encoded values. if the
+    // number of bit requirement between 90th and 100th percentile varies
+    // beyond a threshold then we need to patch the values. if the variation
+    // is not significant then we can use direct encoding
+
+    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 0.9);
+    int diffBitsLH = zzBits100p - zzBits90p;
+
+    // if the difference between 90th percentile and 100th percentile fixed
+    // bits is > 1 then we need patch the values
+    if (diffBitsLH > 1) {
+
+      // patching is done only on base reduced values.
+      // remove base from literals
+      for (int i = 0; i < numLiterals; i++) {
+        baseRedLiterals[i] = literals[i] - min;
+      }
+
+      // 95th percentile width is used to determine max allowed value
+      // after which patching will be done
+      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
+
+      // 100th percentile is used to compute the max patch width
+      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0);
+
+      // after base reducing the values, if the difference in bits between
+      // 95th percentile and 100th percentile value is zero then there
+      // is no point in patching the values, in which case we will
+      // fallback to DIRECT encoding.
+      // The decision to use patched base was based on zigzag values, but the
+      // actual patching is done on base reduced literals.
+      if ((brBits100p - brBits95p) != 0) {
+        encoding = EncodingType.PATCHED_BASE;
+        preparePatchedBlob();
+        return;
+      } else {
+        encoding = EncodingType.DIRECT;
+        return;
+      }
+    } else {
+      // if difference in bits between 95th percentile and 100th percentile is
+      // 0, then patch length will become 0. Hence we will fallback to direct
+      encoding = EncodingType.DIRECT;
+      return;
+    }
+  }
+
+  private void computeZigZagLiterals() {
+    // populate zigzag encoded literals
+    long zzEncVal = 0;
+    for (int i = 0; i < numLiterals; i++) {
+      if (signed) {
+        zzEncVal = utils.zigzagEncode(literals[i]);
+      } else {
+        zzEncVal = literals[i];
+      }
+      zigzagLiterals[i] = zzEncVal;
+    }
+  }
+
+  private void preparePatchedBlob() {
+    // mask will be max value beyond which patch will be generated
+    long mask = (1L << brBits95p) - 1;
+
+    // since we are considering only 95 percentile, the size of gap and
+    // patch array can contain only be 5% values
+    patchLength = (int) Math.ceil((numLiterals * 0.05));
+
+    int[] gapList = new int[patchLength];
+    long[] patchList = new long[patchLength];
+
+    // #bit for patch
+    patchWidth = brBits100p - brBits95p;
+    patchWidth = utils.getClosestFixedBits(patchWidth);
+
+    // if patch bit requirement is 64 then it will not possible to pack
+    // gap and patch together in a long. To make sure gap and patch can be
+    // packed together adjust the patch width
+    if (patchWidth == 64) {
+      patchWidth = 56;
+      brBits95p = 8;
+      mask = (1L << brBits95p) - 1;
+    }
+
+    int gapIdx = 0;
+    int patchIdx = 0;
+    int prev = 0;
+    int gap = 0;
+    int maxGap = 0;
+
+    for(int i = 0; i < numLiterals; i++) {
+      // if value is above mask then create the patch and record the gap
+      if (baseRedLiterals[i] > mask) {
+        gap = i - prev;
+        if (gap > maxGap) {
+          maxGap = gap;
+        }
+
+        // gaps are relative, so store the previous patched value index
+        prev = i;
+        gapList[gapIdx++] = gap;
+
+        // extract the most significant bits that are over mask bits
+        long patch = baseRedLiterals[i] >>> brBits95p;
+        patchList[patchIdx++] = patch;
+
+        // strip off the MSB to enable safe bit packing
+        baseRedLiterals[i] &= mask;
+      }
+    }
+
+    // adjust the patch length to number of entries in gap list
+    patchLength = gapIdx;
+
+    // if the element to be patched is the first and only element then
+    // max gap will be 0, but to store the gap as 0 we need atleast 1 bit
+    if (maxGap == 0 && patchLength != 0) {
+      patchGapWidth = 1;
+    } else {
+      patchGapWidth = utils.findClosestNumBits(maxGap);
+    }
+
+    // special case: if the patch gap width is greater than 256, then
+    // we need 9 bits to encode the gap width. But we only have 3 bits in
+    // header to record the gap width. To deal with this case, we will save
+    // two entries in patch list in the following way
+    // 256 gap width => 0 for patch value
+    // actual gap - 256 => actual patch value
+    // We will do the same for gap width = 511. If the element to be patched is
+    // the last element in the scope then gap width will be 511. In this case we
+    // will have 3 entries in the patch list in the following way
+    // 255 gap width => 0 for patch value
+    // 255 gap width => 0 for patch value
+    // 1 gap width => actual patch value
+    if (patchGapWidth > 8) {
+      patchGapWidth = 8;
+      // for gap = 511, we need two additional entries in patch list
+      if (maxGap == 511) {
+        patchLength += 2;
+      } else {
+        patchLength += 1;
+      }
+    }
+
+    // create gap vs patch list
+    gapIdx = 0;
+    patchIdx = 0;
+    gapVsPatchList = new long[patchLength];
+    for(int i = 0; i < patchLength; i++) {
+      long g = gapList[gapIdx++];
+      long p = patchList[patchIdx++];
+      while (g > 255) {
+        gapVsPatchList[i++] = (255L << patchWidth);
+        g -= 255;
+      }
+
+      // store patch value in LSBs and gap in MSBs
+      gapVsPatchList[i] = (g << patchWidth) | p;
+    }
+  }
+
+  /**
+   * clears all the variables
+   */
+  private void clear() {
+    numLiterals = 0;
+    encoding = null;
+    prevDelta = 0;
+    fixedDelta = 0;
+    zzBits90p = 0;
+    zzBits100p = 0;
+    brBits95p = 0;
+    brBits100p = 0;
+    bitsDeltaMax = 0;
+    patchGapWidth = 0;
+    patchLength = 0;
+    patchWidth = 0;
+    gapVsPatchList = null;
+    min = 0;
+    isFixedDelta = true;
+  }
+
+  @Override
+  public void flush() throws IOException {
+    if (numLiterals != 0) {
+      if (variableRunLength != 0) {
+        determineEncoding();
+        writeValues();
+      } else if (fixedRunLength != 0) {
+        if (fixedRunLength < MIN_REPEAT) {
+          variableRunLength = fixedRunLength;
+          fixedRunLength = 0;
+          determineEncoding();
+          writeValues();
+        } else if (fixedRunLength >= MIN_REPEAT
+            && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+          encoding = EncodingType.SHORT_REPEAT;
+          writeValues();
+        } else {
+          encoding = EncodingType.DELTA;
+          isFixedDelta = true;
+          writeValues();
+        }
+      }
+    }
+    output.flush();
+  }
+
+  @Override
+  public void write(long val) throws IOException {
+    if (numLiterals == 0) {
+      initializeLiterals(val);
+    } else {
+      if (numLiterals == 1) {
+        prevDelta = val - literals[0];
+        literals[numLiterals++] = val;
+        // if both values are same count as fixed run else variable run
+        if (val == literals[0]) {
+          fixedRunLength = 2;
+          variableRunLength = 0;
+        } else {
+          fixedRunLength = 0;
+          variableRunLength = 2;
+        }
+      } else {
+        long currentDelta = val - literals[numLiterals - 1];
+        if (prevDelta == 0 && currentDelta == 0) {
+          // fixed delta run
+
+          literals[numLiterals++] = val;
+
+          // if variable run is non-zero then we are seeing repeating
+          // values at the end of variable run in which case keep
+          // updating variable and fixed runs
+          if (variableRunLength > 0) {
+            fixedRunLength = 2;
+          }
+          fixedRunLength += 1;
+
+          // if fixed run met the minimum condition and if variable
+          // run is non-zero then flush the variable run and shift the
+          // tail fixed runs to start of the buffer
+          if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
+            numLiterals -= MIN_REPEAT;
+            variableRunLength -= MIN_REPEAT - 1;
+            // copy the tail fixed runs
+            long[] tailVals = new long[MIN_REPEAT];
+            System.arraycopy(literals, numLiterals, tailVals, 0, MIN_REPEAT);
+
+            // determine variable encoding and flush values
+            determineEncoding();
+            writeValues();
+
+            // shift tail fixed runs to beginning of the buffer
+            for(long l : tailVals) {
+              literals[numLiterals++] = l;
+            }
+          }
+
+          // if fixed runs reached max repeat length then write values
+          if (fixedRunLength == MAX_SCOPE) {
+            determineEncoding();
+            writeValues();
+          }
+        } else {
+          // variable delta run
+
+          // if fixed run length is non-zero and if it satisfies the
+          // short repeat conditions then write the values as short repeats
+          // else use delta encoding
+          if (fixedRunLength >= MIN_REPEAT) {
+            if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
+              encoding = EncodingType.SHORT_REPEAT;
+              writeValues();
+            } else {
+              encoding = EncodingType.DELTA;
+              isFixedDelta = true;
+              writeValues();
+            }
+          }
+
+          // if fixed run length is <MIN_REPEAT and current value is
+          // different from previous then treat it as variable run
+          if (fixedRunLength > 0 && fixedRunLength < MIN_REPEAT) {
+            if (val != literals[numLiterals - 1]) {
+              variableRunLength = fixedRunLength;
+              fixedRunLength = 0;
+            }
+          }
+
+          // after writing values re-initialize the variables
+          if (numLiterals == 0) {
+            initializeLiterals(val);
+          } else {
+            // keep updating variable run lengths
+            prevDelta = val - literals[numLiterals - 1];
+            literals[numLiterals++] = val;
+            variableRunLength += 1;
+
+            // if variable run length reach the max scope, write it
+            if (variableRunLength == MAX_SCOPE) {
+              determineEncoding();
+              writeValues();
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private void initializeLiterals(long val) {
+    literals[numLiterals++] = val;
+    fixedRunLength = 1;
+    variableRunLength = 1;
+  }
+
+  @Override
+  public void getPosition(PositionRecorder recorder) throws IOException {
+    output.getPosition(recorder);
+    recorder.addPosition(numLiterals);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/SchemaEvolution.java b/orc/src/java/org/apache/hive/orc/impl/SchemaEvolution.java
new file mode 100644
index 0000000..3c7124b
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/SchemaEvolution.java
@@ -0,0 +1,399 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hive.orc.TypeDescription;
+
+/**
+ * Take the file types and the (optional) configuration column names/types and see if there
+ * has been schema evolution.
+ */
+public class SchemaEvolution {
+  // indexed by reader column id
+  private final TypeDescription[] readerFileTypes;
+  // indexed by reader column id
+  private final boolean[] readerIncluded;
+  // the offset to the first column id ignoring any ACID columns
+  private final int readerColumnOffset;
+  // indexed by file column id
+  private final boolean[] fileIncluded;
+  private final TypeDescription fileSchema;
+  private final TypeDescription readerSchema;
+  private boolean hasConversion;
+  // indexed by reader column id
+  private final boolean[] ppdSafeConversion;
+
+  public SchemaEvolution(TypeDescription fileSchema, boolean[] includedCols) {
+    this(fileSchema, null, includedCols);
+  }
+
+  public SchemaEvolution(TypeDescription fileSchema,
+                         TypeDescription readerSchema,
+                         boolean[] includeCols) {
+    this.readerIncluded = includeCols == null ? null : Arrays.copyOf(includeCols, includeCols.length);
+    this.fileIncluded = new boolean[fileSchema.getMaximumId() + 1];
+    this.hasConversion = false;
+    this.fileSchema = fileSchema;
+    boolean isAcid = checkAcidSchema(fileSchema);
+    this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
+    if (readerSchema != null) {
+      if (isAcid) {
+        this.readerSchema = createEventSchema(readerSchema);
+      } else {
+        this.readerSchema = readerSchema;
+      }
+      if (readerIncluded != null &&
+          readerIncluded.length + readerColumnOffset != this.readerSchema.getMaximumId() + 1) {
+        throw new IllegalArgumentException("Include vector the wrong length: " +
+            this.readerSchema.toJson() + " with include length " +
+            readerIncluded.length);
+      }
+      this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
+      buildConversionFileTypesArray(fileSchema, this.readerSchema);
+    } else {
+      this.readerSchema = fileSchema;
+      this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
+      if (readerIncluded != null &&
+          readerIncluded.length + readerColumnOffset != this.readerSchema.getMaximumId() + 1) {
+        throw new IllegalArgumentException("Include vector the wrong length: " +
+            this.readerSchema.toJson() + " with include length " +
+            readerIncluded.length);
+      }
+      buildSameSchemaFileTypesArray();
+    }
+    this.ppdSafeConversion = populatePpdSafeConversion();
+  }
+
+  public TypeDescription getReaderSchema() {
+    return readerSchema;
+  }
+
+  /**
+   * Returns the non-ACID (aka base) reader type description.
+   *
+   * @return the reader type ignoring the ACID rowid columns, if any
+   */
+  public TypeDescription getReaderBaseSchema() {
+    return readerSchema.findSubtype(readerColumnOffset);
+  }
+
+  /**
+   * Is there Schema Evolution data type conversion?
+   * @return
+   */
+  public boolean hasConversion() {
+    return hasConversion;
+  }
+
+  public TypeDescription getFileType(TypeDescription readerType) {
+    return getFileType(readerType.getId());
+  }
+
+  /**
+   * Get whether each column is included from the reader's point of view.
+   * @return a boolean array indexed by reader column id
+   */
+  public boolean[] getReaderIncluded() {
+    return readerIncluded;
+  }
+
+  /**
+   * Get whether each column is included from the file's point of view.
+   * @return a boolean array indexed by file column id
+   */
+  public boolean[] getFileIncluded() {
+    return fileIncluded;
+  }
+
+  /**
+   * Get the file type by reader type id.
+   * @param id reader column id
+   * @return
+   */
+  public TypeDescription getFileType(int id) {
+    return readerFileTypes[id];
+  }
+
+  /**
+   * Check if column is safe for ppd evaluation
+   * @param colId reader column id
+   * @return true if the specified column is safe for ppd evaluation else false
+   */
+  public boolean isPPDSafeConversion(final int colId) {
+    if (hasConversion()) {
+      if (colId < 0 || colId >= ppdSafeConversion.length) {
+        return false;
+      }
+      return ppdSafeConversion[colId];
+    }
+
+    // when there is no schema evolution PPD is safe
+    return true;
+  }
+
+  private boolean[] populatePpdSafeConversion() {
+    if (fileSchema == null || readerSchema == null || readerFileTypes == null) {
+      return null;
+    }
+
+    boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
+    boolean safePpd = validatePPDConversion(fileSchema, readerSchema);
+    result[readerSchema.getId()] = safePpd;
+    List<TypeDescription> children = readerSchema.getChildren();
+    if (children != null) {
+      for (TypeDescription child : children) {
+        TypeDescription fileType = getFileType(child.getId());
+        safePpd = validatePPDConversion(fileType, child);
+        result[child.getId()] = safePpd;
+      }
+    }
+    return result;
+  }
+
+  private boolean validatePPDConversion(final TypeDescription fileType,
+      final TypeDescription readerType) {
+    if (fileType == null) {
+      return false;
+    }
+    if (fileType.getCategory().isPrimitive()) {
+      if (fileType.getCategory().equals(readerType.getCategory())) {
+        // for decimals alone do equality check to not mess up with precision change
+        if (fileType.getCategory().equals(TypeDescription.Category.DECIMAL) &&
+            !fileType.equals(readerType)) {
+          return false;
+        }
+        return true;
+      }
+
+      // only integer and string evolutions are safe
+      // byte -> short -> int -> long
+      // string <-> char <-> varchar
+      // NOTE: Float to double evolution is not safe as floats are stored as doubles in ORC's
+      // internal index, but when doing predicate evaluation for queries like "select * from
+      // orc_float where f = 74.72" the constant on the filter is converted from string -> double
+      // so the precisions will be different and the comparison will fail.
+      // Soon, we should convert all sargs that compare equality between floats or
+      // doubles to range predicates.
+
+      // Similarly string -> char and varchar -> char and vice versa is not possible, as ORC stores
+      // char with padded spaces in its internal index.
+      switch (fileType.getCategory()) {
+        case BYTE:
+          if (readerType.getCategory().equals(TypeDescription.Category.SHORT) ||
+              readerType.getCategory().equals(TypeDescription.Category.INT) ||
+              readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+            return true;
+          }
+          break;
+        case SHORT:
+          if (readerType.getCategory().equals(TypeDescription.Category.INT) ||
+              readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+            return true;
+          }
+          break;
+        case INT:
+          if (readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+            return true;
+          }
+          break;
+        case STRING:
+          if (readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) {
+            return true;
+          }
+          break;
+        case VARCHAR:
+          if (readerType.getCategory().equals(TypeDescription.Category.STRING)) {
+            return true;
+          }
+          break;
+        default:
+          break;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Should we read the given reader column?
+   * @param readerId the id of column in the extended reader schema
+   * @return true if the column should be read
+   */
+  public boolean includeReaderColumn(int readerId) {
+    return readerIncluded == null ||
+        readerId <= readerColumnOffset ||
+        readerIncluded[readerId - readerColumnOffset];
+  }
+
+  void buildConversionFileTypesArray(TypeDescription fileType,
+                                     TypeDescription readerType) {
+    // if the column isn't included, don't map it
+    int readerId = readerType.getId();
+    if (!includeReaderColumn(readerId)) {
+      return;
+    }
+    boolean isOk = true;
+    // check the easy case first
+    if (fileType.getCategory() == readerType.getCategory()) {
+      switch (readerType.getCategory()) {
+        case BOOLEAN:
+        case BYTE:
+        case SHORT:
+        case INT:
+        case LONG:
+        case DOUBLE:
+        case FLOAT:
+        case STRING:
+        case TIMESTAMP:
+        case BINARY:
+        case DATE:
+          // these are always a match
+          break;
+        case CHAR:
+        case VARCHAR:
+          // We do conversion when same CHAR/VARCHAR type but different maxLength.
+          if (fileType.getMaxLength() != readerType.getMaxLength()) {
+            hasConversion = true;
+          }
+          break;
+        case DECIMAL:
+          // We do conversion when same DECIMAL type but different precision/scale.
+          if (fileType.getPrecision() != readerType.getPrecision() ||
+              fileType.getScale() != readerType.getScale()) {
+            hasConversion = true;
+          }
+          break;
+        case UNION:
+        case MAP:
+        case LIST: {
+          // these must be an exact match
+          List<TypeDescription> fileChildren = fileType.getChildren();
+          List<TypeDescription> readerChildren = readerType.getChildren();
+          if (fileChildren.size() == readerChildren.size()) {
+            for(int i=0; i < fileChildren.size(); ++i) {
+              buildConversionFileTypesArray(fileChildren.get(i), readerChildren.get(i));
+            }
+          } else {
+            isOk = false;
+          }
+          break;
+        }
+        case STRUCT: {
+          // allow either side to have fewer fields than the other
+          List<TypeDescription> fileChildren = fileType.getChildren();
+          List<TypeDescription> readerChildren = readerType.getChildren();
+          if (fileChildren.size() != readerChildren.size()) {
+            hasConversion = true;
+          }
+          int jointSize = Math.min(fileChildren.size(), readerChildren.size());
+          for(int i=0; i < jointSize; ++i) {
+            buildConversionFileTypesArray(fileChildren.get(i), readerChildren.get(i));
+          }
+          break;
+        }
+        default:
+          throw new IllegalArgumentException("Unknown type " + readerType);
+      }
+    } else {
+      /*
+       * Check for the few cases where will not convert....
+       */
+
+      isOk = ConvertTreeReaderFactory.canConvert(fileType, readerType);
+      hasConversion = true;
+    }
+    if (isOk) {
+      if (readerFileTypes[readerId] != null) {
+        throw new RuntimeException("reader to file type entry already assigned");
+      }
+      readerFileTypes[readerId] = fileType;
+      fileIncluded[fileType.getId()] = true;
+    } else {
+      throw new IllegalArgumentException(
+          String.format(
+              "ORC does not support type conversion from file type %s (%d) to reader type %s (%d)",
+              fileType.toString(), fileType.getId(),
+              readerType.toString(), readerId));
+    }
+  }
+
+  /**
+   * Use to make a reader to file type array when the schema is the same.
+   * @return
+   */
+  private void buildSameSchemaFileTypesArray() {
+    buildSameSchemaFileTypesArrayRecurse(readerSchema);
+  }
+
+  void buildSameSchemaFileTypesArrayRecurse(TypeDescription readerType) {
+    int id = readerType.getId();
+    if (!includeReaderColumn(id)) {
+      return;
+    }
+    if (readerFileTypes[id] != null) {
+      throw new RuntimeException("reader to file type entry already assigned");
+    }
+    readerFileTypes[id] = readerType;
+    fileIncluded[id] = true;
+    List<TypeDescription> children = readerType.getChildren();
+    if (children != null) {
+      for (TypeDescription child : children) {
+        buildSameSchemaFileTypesArrayRecurse(child);
+      }
+    }
+  }
+
+  private static boolean checkAcidSchema(TypeDescription type) {
+    if (type.getCategory().equals(TypeDescription.Category.STRUCT)) {
+      List<String> rootFields = type.getFieldNames();
+      if (acidEventFieldNames.equals(rootFields)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * @param typeDescr
+   * @return ORC types for the ACID event based on the row's type description
+   */
+  public static TypeDescription createEventSchema(TypeDescription typeDescr) {
+    TypeDescription result = TypeDescription.createStruct()
+        .addField("operation", TypeDescription.createInt())
+        .addField("originalTransaction", TypeDescription.createLong())
+        .addField("bucket", TypeDescription.createInt())
+        .addField("rowId", TypeDescription.createLong())
+        .addField("currentTransaction", TypeDescription.createLong())
+        .addField("row", typeDescr.clone());
+    return result;
+  }
+
+  public static final List<String> acidEventFieldNames= new ArrayList<String>();
+  static {
+    acidEventFieldNames.add("operation");
+    acidEventFieldNames.add("originalTransaction");
+    acidEventFieldNames.add("bucket");
+    acidEventFieldNames.add("rowId");
+    acidEventFieldNames.add("currentTransaction");
+    acidEventFieldNames.add("row");
+  }
+}


[29/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RecordReaderImpl.java b/orc/src/java/org/apache/hive/orc/impl/RecordReaderImpl.java
new file mode 100644
index 0000000..d0edce5
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RecordReaderImpl.java
@@ -0,0 +1,1238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hive.orc.BooleanColumnStatistics;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.CompressionCodec;
+import org.apache.hive.orc.DoubleColumnStatistics;
+import org.apache.hive.orc.IntegerColumnStatistics;
+import org.apache.hive.orc.Reader;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.DataReader;
+import org.apache.hive.orc.DateColumnStatistics;
+import org.apache.hive.orc.DecimalColumnStatistics;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.StringColumnStatistics;
+import org.apache.hive.orc.StripeInformation;
+import org.apache.hive.orc.TimestampColumnStatistics;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.BloomFilterIO;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.orc.OrcProto;
+
+public class RecordReaderImpl implements RecordReader {
+  static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class);
+  private static final boolean isLogDebugEnabled = LOG.isDebugEnabled();
+  private static final Object UNKNOWN_VALUE = new Object();
+  protected final Path path;
+  private final long firstRow;
+  private final List<StripeInformation> stripes =
+      new ArrayList<StripeInformation>();
+  private OrcProto.StripeFooter stripeFooter;
+  private final long totalRowCount;
+  private final CompressionCodec codec;
+  protected final TypeDescription schema;
+  private final List<OrcProto.Type> types;
+  private final int bufferSize;
+  private final SchemaEvolution evolution;
+  // the file included columns indexed by the file's column ids.
+  private final boolean[] included;
+  private final long rowIndexStride;
+  private long rowInStripe = 0;
+  private int currentStripe = -1;
+  private long rowBaseInStripe = 0;
+  private long rowCountInStripe = 0;
+  private final Map<StreamName, InStream> streams =
+      new HashMap<StreamName, InStream>();
+  DiskRangeList bufferChunks = null;
+  private final TreeReaderFactory.TreeReader reader;
+  private final OrcProto.RowIndex[] indexes;
+  private final OrcProto.BloomFilterIndex[] bloomFilterIndices;
+  private final SargApplier sargApp;
+  // an array about which row groups aren't skipped
+  private boolean[] includedRowGroups = null;
+  private final DataReader dataReader;
+
+  /**
+   * Given a list of column names, find the given column and return the index.
+   *
+   * @param evolution the mapping from reader to file schema
+   * @param columnName  the column name to look for
+   * @return the file column id or -1 if the column wasn't found
+   */
+  static int findColumns(SchemaEvolution evolution,
+                         String columnName) {
+    TypeDescription readerSchema = evolution.getReaderBaseSchema();
+    List<String> fieldNames = readerSchema.getFieldNames();
+    List<TypeDescription> children = readerSchema.getChildren();
+    for (int i = 0; i < fieldNames.size(); ++i) {
+      if (columnName.equals(fieldNames.get(i))) {
+        TypeDescription result = evolution.getFileType(children.get(i).getId());
+        return result == null ? -1 : result.getId();
+      }
+    }
+    return -1;
+  }
+
+  /**
+   * Find the mapping from predicate leaves to columns.
+   * @param sargLeaves the search argument that we need to map
+   * @param evolution the mapping from reader to file schema
+   * @return an array mapping the sarg leaves to file column ids
+   */
+  public static int[] mapSargColumnsToOrcInternalColIdx(List<PredicateLeaf> sargLeaves,
+                             SchemaEvolution evolution) {
+    int[] result = new int[sargLeaves.size()];
+    Arrays.fill(result, -1);
+    for(int i=0; i < result.length; ++i) {
+      String colName = sargLeaves.get(i).getColumnName();
+      result[i] = findColumns(evolution, colName);
+    }
+    return result;
+  }
+
+  protected RecordReaderImpl(ReaderImpl fileReader,
+                             Reader.Options options) throws IOException {
+    boolean[] readerIncluded = options.getInclude();
+    if (options.getSchema() == null) {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Reader schema not provided -- using file schema " +
+            fileReader.getSchema());
+      }
+      evolution = new SchemaEvolution(fileReader.getSchema(), readerIncluded);
+    } else {
+
+      // Now that we are creating a record reader for a file, validate that the schema to read
+      // is compatible with the file schema.
+      //
+      evolution = new SchemaEvolution(fileReader.getSchema(),
+          options.getSchema(), readerIncluded);
+      if (LOG.isDebugEnabled() && evolution.hasConversion()) {
+        LOG.debug("ORC file " + fileReader.path.toString() +
+            " has data type conversion --\n" +
+            "reader schema: " + options.getSchema().toString() + "\n" +
+            "file schema:   " + fileReader.getSchema());
+      }
+    }
+    this.schema = evolution.getReaderSchema();
+    this.path = fileReader.path;
+    this.codec = fileReader.codec;
+    this.types = fileReader.types;
+    this.bufferSize = fileReader.bufferSize;
+    this.rowIndexStride = fileReader.rowIndexStride;
+    SearchArgument sarg = options.getSearchArgument();
+    if (sarg != null && rowIndexStride != 0) {
+      sargApp = new SargApplier(sarg, options.getColumnNames(), rowIndexStride,
+          evolution);
+    } else {
+      sargApp = null;
+    }
+    long rows = 0;
+    long skippedRows = 0;
+    long offset = options.getOffset();
+    long maxOffset = options.getMaxOffset();
+    for(StripeInformation stripe: fileReader.getStripes()) {
+      long stripeStart = stripe.getOffset();
+      if (offset > stripeStart) {
+        skippedRows += stripe.getNumberOfRows();
+      } else if (stripeStart < maxOffset) {
+        this.stripes.add(stripe);
+        rows += stripe.getNumberOfRows();
+      }
+    }
+
+    Boolean zeroCopy = options.getUseZeroCopy();
+    if (zeroCopy == null) {
+      zeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(fileReader.conf);
+    }
+    if (options.getDataReader() != null) {
+      this.dataReader = options.getDataReader();
+    } else {
+      this.dataReader = RecordReaderUtils.createDefaultDataReader(
+          DataReaderProperties.builder()
+              .withBufferSize(bufferSize)
+              .withCompression(fileReader.compressionKind)
+              .withFileSystem(fileReader.fileSystem)
+              .withPath(fileReader.path)
+              .withTypeCount(types.size())
+              .withZeroCopy(zeroCopy)
+              .build());
+    }
+    this.dataReader.open();
+
+    firstRow = skippedRows;
+    totalRowCount = rows;
+    Boolean skipCorrupt = options.getSkipCorruptRecords();
+    if (skipCorrupt == null) {
+      skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(fileReader.conf);
+    }
+
+    reader = TreeReaderFactory.createTreeReader(evolution.getReaderSchema(),
+        evolution, readerIncluded, skipCorrupt);
+    indexes = new OrcProto.RowIndex[types.size()];
+    bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
+    this.included = evolution.getFileIncluded();
+    advanceToNextRow(reader, 0L, true);
+  }
+
+  public static final class PositionProviderImpl implements PositionProvider {
+    private final OrcProto.RowIndexEntry entry;
+    private int index;
+
+    public PositionProviderImpl(OrcProto.RowIndexEntry entry) {
+      this(entry, 0);
+    }
+
+    public PositionProviderImpl(OrcProto.RowIndexEntry entry, int startPos) {
+      this.entry = entry;
+      this.index = startPos;
+    }
+
+    @Override
+    public long getNext() {
+      return entry.getPositions(index++);
+    }
+
+    @Override
+    public String toString() {
+      return "{" + entry.getPositionsList() + "; " + index + "}";
+    }
+  }
+
+  public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
+                                                ) throws IOException {
+    return dataReader.readStripeFooter(stripe);
+  }
+
+  enum Location {
+    BEFORE, MIN, MIDDLE, MAX, AFTER
+  }
+
+  /**
+   * Given a point and min and max, determine if the point is before, at the
+   * min, in the middle, at the max, or after the range.
+   * @param point the point to test
+   * @param min the minimum point
+   * @param max the maximum point
+   * @param <T> the type of the comparision
+   * @return the location of the point
+   */
+  static <T> Location compareToRange(Comparable<T> point, T min, T max) {
+    int minCompare = point.compareTo(min);
+    if (minCompare < 0) {
+      return Location.BEFORE;
+    } else if (minCompare == 0) {
+      return Location.MIN;
+    }
+    int maxCompare = point.compareTo(max);
+    if (maxCompare > 0) {
+      return Location.AFTER;
+    } else if (maxCompare == 0) {
+      return Location.MAX;
+    }
+    return Location.MIDDLE;
+  }
+
+  /**
+   * Get the maximum value out of an index entry.
+   * @param index
+   *          the index entry
+   * @return the object for the maximum value or null if there isn't one
+   */
+  static Object getMax(ColumnStatistics index) {
+    if (index instanceof IntegerColumnStatistics) {
+      return ((IntegerColumnStatistics) index).getMaximum();
+    } else if (index instanceof DoubleColumnStatistics) {
+      return ((DoubleColumnStatistics) index).getMaximum();
+    } else if (index instanceof StringColumnStatistics) {
+      return ((StringColumnStatistics) index).getMaximum();
+    } else if (index instanceof DateColumnStatistics) {
+      return ((DateColumnStatistics) index).getMaximum();
+    } else if (index instanceof DecimalColumnStatistics) {
+      return ((DecimalColumnStatistics) index).getMaximum();
+    } else if (index instanceof TimestampColumnStatistics) {
+      return ((TimestampColumnStatistics) index).getMaximum();
+    } else if (index instanceof BooleanColumnStatistics) {
+      if (((BooleanColumnStatistics)index).getTrueCount()!=0) {
+        return Boolean.TRUE;
+      } else {
+        return Boolean.FALSE;
+      }
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Get the minimum value out of an index entry.
+   * @param index
+   *          the index entry
+   * @return the object for the minimum value or null if there isn't one
+   */
+  static Object getMin(ColumnStatistics index) {
+    if (index instanceof IntegerColumnStatistics) {
+      return ((IntegerColumnStatistics) index).getMinimum();
+    } else if (index instanceof DoubleColumnStatistics) {
+      return ((DoubleColumnStatistics) index).getMinimum();
+    } else if (index instanceof StringColumnStatistics) {
+      return ((StringColumnStatistics) index).getMinimum();
+    } else if (index instanceof DateColumnStatistics) {
+      return ((DateColumnStatistics) index).getMinimum();
+    } else if (index instanceof DecimalColumnStatistics) {
+      return ((DecimalColumnStatistics) index).getMinimum();
+    } else if (index instanceof TimestampColumnStatistics) {
+      return ((TimestampColumnStatistics) index).getMinimum();
+    } else if (index instanceof BooleanColumnStatistics) {
+      if (((BooleanColumnStatistics)index).getFalseCount()!=0) {
+        return Boolean.FALSE;
+      } else {
+        return Boolean.TRUE;
+      }
+    } else {
+      return UNKNOWN_VALUE; // null is not safe here
+    }
+  }
+
+  /**
+   * Evaluate a predicate with respect to the statistics from the column
+   * that is referenced in the predicate.
+   * @param statsProto the statistics for the column mentioned in the predicate
+   * @param predicate the leaf predicate we need to evaluation
+   * @param bloomFilter
+   * @return the set of truth values that may be returned for the given
+   *   predicate.
+   */
+  static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
+      PredicateLeaf predicate, OrcProto.BloomFilter bloomFilter) {
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
+    Object minValue = getMin(cs);
+    Object maxValue = getMax(cs);
+    BloomFilterIO bf = null;
+    if (bloomFilter != null) {
+      bf = new BloomFilterIO(bloomFilter);
+    }
+    return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), bf);
+  }
+
+  /**
+   * Evaluate a predicate with respect to the statistics from the column
+   * that is referenced in the predicate.
+   * @param stats the statistics for the column mentioned in the predicate
+   * @param predicate the leaf predicate we need to evaluation
+   * @return the set of truth values that may be returned for the given
+   *   predicate.
+   */
+  public static TruthValue evaluatePredicate(ColumnStatistics stats,
+                                             PredicateLeaf predicate,
+                                             BloomFilterIO bloomFilter) {
+    Object minValue = getMin(stats);
+    Object maxValue = getMax(stats);
+    return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter);
+  }
+
+  static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
+      Object max, boolean hasNull, BloomFilterIO bloomFilter) {
+    // if we didn't have any values, everything must have been null
+    if (min == null) {
+      if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
+        return TruthValue.YES;
+      } else {
+        return TruthValue.NULL;
+      }
+    } else if (min == UNKNOWN_VALUE) {
+      return TruthValue.YES_NO_NULL;
+    }
+
+    // TODO: Enabling PPD for timestamp requires ORC-101 and ORC-135
+    if (min != null && min instanceof Timestamp) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Not using predication pushdown on {} because it doesn't " +
+          "include ORC-135.", predicate.getColumnName());
+      }
+      return TruthValue.YES_NO_NULL;
+    }
+
+    TruthValue result;
+    Object baseObj = predicate.getLiteral();
+    try {
+      // Predicate object and stats objects are converted to the type of the predicate object.
+      Object minValue = getBaseObjectForComparison(predicate.getType(), min);
+      Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
+      Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);
+
+      result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
+      if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) {
+        result = evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull);
+      }
+      // in case failed conversion, return the default YES_NO_NULL truth value
+    } catch (Exception e) {
+      if (LOG.isDebugEnabled()) {
+        final String statsType = min == null ?
+            (max == null ? "null" : max.getClass().getSimpleName()) :
+            min.getClass().getSimpleName();
+        final String predicateType = baseObj == null ? "null" : baseObj.getClass().getSimpleName();
+        final String reason = e.getClass().getSimpleName() + " when evaluating predicate." +
+            " Skipping ORC PPD." +
+            " Exception: " + e.getMessage() +
+            " StatsType: " + statsType +
+            " PredicateType: " + predicateType;
+        LOG.debug(reason);
+      }
+      if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS) || !hasNull) {
+        result = TruthValue.YES_NO;
+      } else {
+        result = TruthValue.YES_NO_NULL;
+      }
+    }
+    return result;
+  }
+
+  private static boolean shouldEvaluateBloomFilter(PredicateLeaf predicate,
+      TruthValue result, BloomFilterIO bloomFilter) {
+    // evaluate bloom filter only when
+    // 1) Bloom filter is available
+    // 2) Min/Max evaluation yield YES or MAYBE
+    // 3) Predicate is EQUALS or IN list
+    if (bloomFilter != null
+        && result != TruthValue.NO_NULL && result != TruthValue.NO
+        && (predicate.getOperator().equals(PredicateLeaf.Operator.EQUALS)
+            || predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+            || predicate.getOperator().equals(PredicateLeaf.Operator.IN))) {
+      return true;
+    }
+    return false;
+  }
+
+  private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj,
+      Object minValue,
+      Object maxValue,
+      boolean hasNull) {
+    Location loc;
+
+    switch (predicate.getOperator()) {
+      case NULL_SAFE_EQUALS:
+        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        if (loc == Location.BEFORE || loc == Location.AFTER) {
+          return TruthValue.NO;
+        } else {
+          return TruthValue.YES_NO;
+        }
+      case EQUALS:
+        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        if (minValue.equals(maxValue) && loc == Location.MIN) {
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+        } else if (loc == Location.BEFORE || loc == Location.AFTER) {
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+        } else {
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+        }
+      case LESS_THAN:
+        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        if (loc == Location.AFTER) {
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+        } else if (loc == Location.BEFORE || loc == Location.MIN) {
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+        } else {
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+        }
+      case LESS_THAN_EQUALS:
+        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        if (loc == Location.AFTER || loc == Location.MAX) {
+          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+        } else if (loc == Location.BEFORE) {
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+        } else {
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+        }
+      case IN:
+        if (minValue.equals(maxValue)) {
+          // for a single value, look through to see if that value is in the
+          // set
+          for (Object arg : predicate.getLiteralList()) {
+            predObj = getBaseObjectForComparison(predicate.getType(), arg);
+            loc = compareToRange((Comparable) predObj, minValue, maxValue);
+            if (loc == Location.MIN) {
+              return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+            }
+          }
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+        } else {
+          // are all of the values outside of the range?
+          for (Object arg : predicate.getLiteralList()) {
+            predObj = getBaseObjectForComparison(predicate.getType(), arg);
+            loc = compareToRange((Comparable) predObj, minValue, maxValue);
+            if (loc == Location.MIN || loc == Location.MIDDLE ||
+                loc == Location.MAX) {
+              return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+            }
+          }
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+        }
+      case BETWEEN:
+        List<Object> args = predicate.getLiteralList();
+        Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0));
+
+        loc = compareToRange((Comparable) predObj1, minValue, maxValue);
+        if (loc == Location.BEFORE || loc == Location.MIN) {
+          Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1));
+
+          Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
+          if (loc2 == Location.AFTER || loc2 == Location.MAX) {
+            return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
+          } else if (loc2 == Location.BEFORE) {
+            return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+          } else {
+            return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+          }
+        } else if (loc == Location.AFTER) {
+          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+        } else {
+          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+        }
+      case IS_NULL:
+        // min = null condition above handles the all-nulls YES case
+        return hasNull ? TruthValue.YES_NO : TruthValue.NO;
+      default:
+        return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+    }
+  }
+
+  private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate,
+      final Object predObj, BloomFilterIO bloomFilter, boolean hasNull) {
+    switch (predicate.getOperator()) {
+      case NULL_SAFE_EQUALS:
+        // null safe equals does not return *_NULL variant. So set hasNull to false
+        return checkInBloomFilter(bloomFilter, predObj, false);
+      case EQUALS:
+        return checkInBloomFilter(bloomFilter, predObj, hasNull);
+      case IN:
+        for (Object arg : predicate.getLiteralList()) {
+          // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe
+          Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg);
+          TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull);
+          if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) {
+            return result;
+          }
+        }
+        return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+      default:
+        return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
+    }
+  }
+
+  private static TruthValue checkInBloomFilter(BloomFilterIO bf, Object predObj, boolean hasNull) {
+    TruthValue result = hasNull ? TruthValue.NO_NULL : TruthValue.NO;
+
+    if (predObj instanceof Long) {
+      if (bf.testLong(((Long) predObj).longValue())) {
+        result = TruthValue.YES_NO_NULL;
+      }
+    } else if (predObj instanceof Double) {
+      if (bf.testDouble(((Double) predObj).doubleValue())) {
+        result = TruthValue.YES_NO_NULL;
+      }
+    } else if (predObj instanceof String || predObj instanceof Text ||
+        predObj instanceof HiveDecimalWritable ||
+        predObj instanceof BigDecimal) {
+      if (bf.testString(predObj.toString())) {
+        result = TruthValue.YES_NO_NULL;
+      }
+    } else if (predObj instanceof Timestamp) {
+      if (bf.testLong(((Timestamp) predObj).getTime())) {
+        result = TruthValue.YES_NO_NULL;
+      }
+    } else if (predObj instanceof Date) {
+      if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
+        result = TruthValue.YES_NO_NULL;
+      }
+    } else {
+        // if the predicate object is null and if hasNull says there are no nulls then return NO
+        if (predObj == null && !hasNull) {
+          result = TruthValue.NO;
+        } else {
+          result = TruthValue.YES_NO_NULL;
+        }
+      }
+
+    if (result == TruthValue.YES_NO_NULL && !hasNull) {
+      result = TruthValue.YES_NO;
+    }
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Bloom filter evaluation: " + result.toString());
+    }
+
+    return result;
+  }
+
+  private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) {
+    if (obj == null) {
+      return null;
+    }
+    switch (type) {
+      case BOOLEAN:
+        if (obj instanceof Boolean) {
+          return obj;
+        } else {
+          // will only be true if the string conversion yields "true", all other values are
+          // considered false
+          return Boolean.valueOf(obj.toString());
+        }
+      case DATE:
+        if (obj instanceof Date) {
+          return obj;
+        } else if (obj instanceof String) {
+          return Date.valueOf((String) obj);
+        } else if (obj instanceof Timestamp) {
+          return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L);
+        }
+        // always string, but prevent the comparison to numbers (are they days/seconds/milliseconds?)
+        break;
+      case DECIMAL:
+        if (obj instanceof Boolean) {
+          return new HiveDecimalWritable(((Boolean) obj).booleanValue() ?
+              HiveDecimal.ONE : HiveDecimal.ZERO);
+        } else if (obj instanceof Integer) {
+          return new HiveDecimalWritable(((Integer) obj).intValue());
+        } else if (obj instanceof Long) {
+          return new HiveDecimalWritable(((Long) obj));
+        } else if (obj instanceof Float || obj instanceof Double ||
+            obj instanceof String) {
+          return new HiveDecimalWritable(obj.toString());
+        } else if (obj instanceof BigDecimal) {
+          return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) obj));
+        } else if (obj instanceof HiveDecimal) {
+          return new HiveDecimalWritable((HiveDecimal) obj);
+        } else if (obj instanceof HiveDecimalWritable) {
+          return obj;
+        } else if (obj instanceof Timestamp) {
+          return new HiveDecimalWritable(Double.toString(
+              TimestampUtils.getDouble((Timestamp) obj)));
+        }
+        break;
+      case FLOAT:
+        if (obj instanceof Number) {
+          // widening conversion
+          return ((Number) obj).doubleValue();
+        } else if (obj instanceof HiveDecimal) {
+          return ((HiveDecimal) obj).doubleValue();
+        } else if (obj instanceof String) {
+          return Double.valueOf(obj.toString());
+        } else if (obj instanceof Timestamp) {
+          return TimestampUtils.getDouble((Timestamp) obj);
+        } else if (obj instanceof HiveDecimal) {
+          return ((HiveDecimal) obj).doubleValue();
+        } else if (obj instanceof BigDecimal) {
+          return ((BigDecimal) obj).doubleValue();
+        }
+        break;
+      case LONG:
+        if (obj instanceof Number) {
+          // widening conversion
+          return ((Number) obj).longValue();
+        } else if (obj instanceof HiveDecimal) {
+          return ((HiveDecimal) obj).longValue();
+        } else if (obj instanceof String) {
+          return Long.valueOf(obj.toString());
+        }
+        break;
+      case STRING:
+        if (obj != null) {
+          return (obj.toString());
+        }
+        break;
+      case TIMESTAMP:
+        if (obj instanceof Timestamp) {
+          return obj;
+        } else if (obj instanceof Integer) {
+          return new Timestamp(((Number) obj).longValue());
+        } else if (obj instanceof Float) {
+          return TimestampUtils.doubleToTimestamp(((Float) obj).doubleValue());
+        } else if (obj instanceof Double) {
+          return TimestampUtils.doubleToTimestamp(((Double) obj).doubleValue());
+        } else if (obj instanceof HiveDecimal) {
+          return TimestampUtils.decimalToTimestamp((HiveDecimal) obj);
+        } else if (obj instanceof HiveDecimalWritable) {
+          return TimestampUtils.decimalToTimestamp(((HiveDecimalWritable) obj).getHiveDecimal());
+        } else if (obj instanceof Date) {
+          return new Timestamp(((Date) obj).getTime());
+        }
+        // float/double conversion to timestamp is interpreted as seconds whereas integer conversion
+        // to timestamp is interpreted as milliseconds by default. The integer to timestamp casting
+        // is also config driven. The filter operator changes its promotion based on config:
+        // "int.timestamp.conversion.in.seconds". Disable PPD for integer cases.
+        break;
+      default:
+        break;
+    }
+
+    throw new IllegalArgumentException(String.format(
+        "ORC SARGS could not convert from %s to %s", obj == null ? "(null)" : obj.getClass()
+            .getSimpleName(), type));
+  }
+
+  public static class SargApplier {
+    public final static boolean[] READ_ALL_RGS = null;
+    public final static boolean[] READ_NO_RGS = new boolean[0];
+
+    private final SearchArgument sarg;
+    private final List<PredicateLeaf> sargLeaves;
+    private final int[] filterColumns;
+    private final long rowIndexStride;
+    // same as the above array, but indices are set to true
+    private final boolean[] sargColumns;
+    private SchemaEvolution evolution;
+
+    public SargApplier(SearchArgument sarg, String[] columnNames,
+                       long rowIndexStride,
+                       SchemaEvolution evolution) {
+      this.sarg = sarg;
+      sargLeaves = sarg.getLeaves();
+      filterColumns = mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution);
+      this.rowIndexStride = rowIndexStride;
+      // included will not be null, row options will fill the array with trues if null
+      sargColumns = new boolean[evolution.getFileIncluded().length];
+      for (int i : filterColumns) {
+        // filter columns may have -1 as index which could be partition column in SARG.
+        if (i > 0) {
+          sargColumns[i] = true;
+        }
+      }
+      this.evolution = evolution;
+    }
+
+    /**
+     * Pick the row groups that we need to load from the current stripe.
+     *
+     * @return an array with a boolean for each row group or null if all of the
+     * row groups must be read.
+     * @throws IOException
+     */
+    public boolean[] pickRowGroups(StripeInformation stripe, OrcProto.RowIndex[] indexes,
+        OrcProto.BloomFilterIndex[] bloomFilterIndices, boolean returnNone) throws IOException {
+      long rowsInStripe = stripe.getNumberOfRows();
+      int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
+      boolean[] result = new boolean[groupsInStripe]; // TODO: avoid alloc?
+      TruthValue[] leafValues = new TruthValue[sargLeaves.size()];
+      boolean hasSelected = false, hasSkipped = false;
+      for (int rowGroup = 0; rowGroup < result.length; ++rowGroup) {
+        for (int pred = 0; pred < leafValues.length; ++pred) {
+          int columnIx = filterColumns[pred];
+          if (columnIx != -1) {
+            if (indexes[columnIx] == null) {
+              throw new AssertionError("Index is not populated for " + columnIx);
+            }
+            OrcProto.RowIndexEntry entry = indexes[columnIx].getEntry(rowGroup);
+            if (entry == null) {
+              throw new AssertionError("RG is not populated for " + columnIx + " rg " + rowGroup);
+            }
+            OrcProto.ColumnStatistics stats = entry.getStatistics();
+            OrcProto.BloomFilter bf = null;
+            if (bloomFilterIndices != null && bloomFilterIndices[columnIx] != null) {
+              bf = bloomFilterIndices[columnIx].getBloomFilter(rowGroup);
+            }
+            if (evolution != null && evolution.isPPDSafeConversion(columnIx)) {
+              leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf);
+            } else {
+              leafValues[pred] = TruthValue.YES_NO_NULL;
+            }
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Stats = " + stats);
+              LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]);
+            }
+          } else {
+            // the column is a virtual column
+            leafValues[pred] = TruthValue.YES_NO_NULL;
+          }
+        }
+        result[rowGroup] = sarg.evaluate(leafValues).isNeeded();
+        hasSelected = hasSelected || result[rowGroup];
+        hasSkipped = hasSkipped || (!result[rowGroup]);
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Row group " + (rowIndexStride * rowGroup) + " to " +
+              (rowIndexStride * (rowGroup + 1) - 1) + " is " +
+              (result[rowGroup] ? "" : "not ") + "included.");
+        }
+      }
+
+      return hasSkipped ? ((hasSelected || !returnNone) ? result : READ_NO_RGS) : READ_ALL_RGS;
+    }
+  }
+
+  /**
+   * Pick the row groups that we need to load from the current stripe.
+   *
+   * @return an array with a boolean for each row group or null if all of the
+   * row groups must be read.
+   * @throws IOException
+   */
+  protected boolean[] pickRowGroups() throws IOException {
+    // if we don't have a sarg or indexes, we read everything
+    if (sargApp == null) {
+      return null;
+    }
+    readRowIndex(currentStripe, included, sargApp.sargColumns);
+    return sargApp.pickRowGroups(stripes.get(currentStripe), indexes, bloomFilterIndices, false);
+  }
+
+  private void clearStreams() {
+    // explicit close of all streams to de-ref ByteBuffers
+    for (InStream is : streams.values()) {
+      is.close();
+    }
+    if (bufferChunks != null) {
+      if (dataReader.isTrackingDiskRanges()) {
+        for (DiskRangeList range = bufferChunks; range != null; range = range.next) {
+          if (!(range instanceof BufferChunk)) {
+            continue;
+          }
+          dataReader.releaseBuffer(((BufferChunk) range).getChunk());
+        }
+      }
+    }
+    bufferChunks = null;
+    streams.clear();
+  }
+
+  /**
+   * Read the current stripe into memory.
+   *
+   * @throws IOException
+   */
+  private void readStripe() throws IOException {
+    StripeInformation stripe = beginReadStripe();
+    includedRowGroups = pickRowGroups();
+
+    // move forward to the first unskipped row
+    if (includedRowGroups != null) {
+      while (rowInStripe < rowCountInStripe &&
+          !includedRowGroups[(int) (rowInStripe / rowIndexStride)]) {
+        rowInStripe = Math.min(rowCountInStripe, rowInStripe + rowIndexStride);
+      }
+    }
+
+    // if we haven't skipped the whole stripe, read the data
+    if (rowInStripe < rowCountInStripe) {
+      // if we aren't projecting columns or filtering rows, just read it all
+      if (included == null && includedRowGroups == null) {
+        readAllDataStreams(stripe);
+      } else {
+        readPartialDataStreams(stripe);
+      }
+      reader.startStripe(streams, stripeFooter);
+      // if we skipped the first row group, move the pointers forward
+      if (rowInStripe != 0) {
+        seekToRowEntry(reader, (int) (rowInStripe / rowIndexStride));
+      }
+    }
+  }
+
+  private StripeInformation beginReadStripe() throws IOException {
+    StripeInformation stripe = stripes.get(currentStripe);
+    stripeFooter = readStripeFooter(stripe);
+    clearStreams();
+    // setup the position in the stripe
+    rowCountInStripe = stripe.getNumberOfRows();
+    rowInStripe = 0;
+    rowBaseInStripe = 0;
+    for (int i = 0; i < currentStripe; ++i) {
+      rowBaseInStripe += stripes.get(i).getNumberOfRows();
+    }
+    // reset all of the indexes
+    for (int i = 0; i < indexes.length; ++i) {
+      indexes[i] = null;
+    }
+    return stripe;
+  }
+
+  private void readAllDataStreams(StripeInformation stripe) throws IOException {
+    long start = stripe.getIndexLength();
+    long end = start + stripe.getDataLength();
+    // explicitly trigger 1 big read
+    DiskRangeList toRead = new DiskRangeList(start, end);
+    bufferChunks = dataReader.readFileData(toRead, stripe.getOffset(), false);
+    List<OrcProto.Stream> streamDescriptions = stripeFooter.getStreamsList();
+    createStreams(streamDescriptions, bufferChunks, null, codec, bufferSize, streams);
+  }
+
+  /**
+   * Plan the ranges of the file that we need to read given the list of
+   * columns and row groups.
+   *
+   * @param streamList        the list of streams available
+   * @param indexes           the indexes that have been loaded
+   * @param includedColumns   which columns are needed
+   * @param includedRowGroups which row groups are needed
+   * @param isCompressed      does the file have generic compression
+   * @param encodings         the encodings for each column
+   * @param types             the types of the columns
+   * @param compressionSize   the compression block size
+   * @return the list of disk ranges that will be loaded
+   */
+  static DiskRangeList planReadPartialDataStreams
+  (List<OrcProto.Stream> streamList,
+      OrcProto.RowIndex[] indexes,
+      boolean[] includedColumns,
+      boolean[] includedRowGroups,
+      boolean isCompressed,
+      List<OrcProto.ColumnEncoding> encodings,
+      List<OrcProto.Type> types,
+      int compressionSize,
+      boolean doMergeBuffers) {
+    long offset = 0;
+    // figure out which columns have a present stream
+    boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
+    CreateHelper list = new CreateHelper();
+    for (OrcProto.Stream stream : streamList) {
+      long length = stream.getLength();
+      int column = stream.getColumn();
+      OrcProto.Stream.Kind streamKind = stream.getKind();
+      // since stream kind is optional, first check if it exists
+      if (stream.hasKind() &&
+          (StreamName.getArea(streamKind) == StreamName.Area.DATA) &&
+          (column < includedColumns.length && includedColumns[column])) {
+        // if we aren't filtering or it is a dictionary, load it.
+        if (includedRowGroups == null
+            || RecordReaderUtils.isDictionary(streamKind, encodings.get(column))) {
+          RecordReaderUtils.addEntireStreamToRanges(offset, length, list, doMergeBuffers);
+        } else {
+          RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRowGroups,
+              isCompressed, indexes[column], encodings.get(column), types.get(column),
+              compressionSize, hasNull[column], offset, length, list, doMergeBuffers);
+        }
+      }
+      offset += length;
+    }
+    return list.extract();
+  }
+
+  void createStreams(List<OrcProto.Stream> streamDescriptions,
+      DiskRangeList ranges,
+      boolean[] includeColumn,
+      CompressionCodec codec,
+      int bufferSize,
+      Map<StreamName, InStream> streams) throws IOException {
+    long streamOffset = 0;
+    for (OrcProto.Stream streamDesc : streamDescriptions) {
+      int column = streamDesc.getColumn();
+      if ((includeColumn != null &&
+          (column < included.length && !includeColumn[column])) ||
+          streamDesc.hasKind() &&
+              (StreamName.getArea(streamDesc.getKind()) != StreamName.Area.DATA)) {
+        streamOffset += streamDesc.getLength();
+        continue;
+      }
+      List<DiskRange> buffers = RecordReaderUtils.getStreamBuffers(
+          ranges, streamOffset, streamDesc.getLength());
+      StreamName name = new StreamName(column, streamDesc.getKind());
+      streams.put(name, InStream.create(name.toString(), buffers,
+          streamDesc.getLength(), codec, bufferSize));
+      streamOffset += streamDesc.getLength();
+    }
+  }
+
+  private void readPartialDataStreams(StripeInformation stripe) throws IOException {
+    List<OrcProto.Stream> streamList = stripeFooter.getStreamsList();
+    DiskRangeList toRead = planReadPartialDataStreams(streamList,
+        indexes, included, includedRowGroups, codec != null,
+        stripeFooter.getColumnsList(), types, bufferSize, true);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("chunks = " + RecordReaderUtils.stringifyDiskRanges(toRead));
+    }
+    bufferChunks = dataReader.readFileData(toRead, stripe.getOffset(), false);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("merge = " + RecordReaderUtils.stringifyDiskRanges(bufferChunks));
+    }
+
+    createStreams(streamList, bufferChunks, included, codec, bufferSize, streams);
+  }
+
+  /**
+   * Read the next stripe until we find a row that we don't skip.
+   *
+   * @throws IOException
+   */
+  private void advanceStripe() throws IOException {
+    rowInStripe = rowCountInStripe;
+    while (rowInStripe >= rowCountInStripe &&
+        currentStripe < stripes.size() - 1) {
+      currentStripe += 1;
+      readStripe();
+    }
+  }
+
+  /**
+   * Skip over rows that we aren't selecting, so that the next row is
+   * one that we will read.
+   *
+   * @param nextRow the row we want to go to
+   * @throws IOException
+   */
+  private boolean advanceToNextRow(
+      TreeReaderFactory.TreeReader reader, long nextRow, boolean canAdvanceStripe)
+      throws IOException {
+    long nextRowInStripe = nextRow - rowBaseInStripe;
+    // check for row skipping
+    if (rowIndexStride != 0 &&
+        includedRowGroups != null &&
+        nextRowInStripe < rowCountInStripe) {
+      int rowGroup = (int) (nextRowInStripe / rowIndexStride);
+      if (!includedRowGroups[rowGroup]) {
+        while (rowGroup < includedRowGroups.length && !includedRowGroups[rowGroup]) {
+          rowGroup += 1;
+        }
+        if (rowGroup >= includedRowGroups.length) {
+          if (canAdvanceStripe) {
+            advanceStripe();
+          }
+          return canAdvanceStripe;
+        }
+        nextRowInStripe = Math.min(rowCountInStripe, rowGroup * rowIndexStride);
+      }
+    }
+    if (nextRowInStripe >= rowCountInStripe) {
+      if (canAdvanceStripe) {
+        advanceStripe();
+      }
+      return canAdvanceStripe;
+    }
+    if (nextRowInStripe != rowInStripe) {
+      if (rowIndexStride != 0) {
+        int rowGroup = (int) (nextRowInStripe / rowIndexStride);
+        seekToRowEntry(reader, rowGroup);
+        reader.skipRows(nextRowInStripe - rowGroup * rowIndexStride);
+      } else {
+        reader.skipRows(nextRowInStripe - rowInStripe);
+      }
+      rowInStripe = nextRowInStripe;
+    }
+    return true;
+  }
+
+  @Override
+  public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
+    try {
+      if (rowInStripe >= rowCountInStripe) {
+        currentStripe += 1;
+        if (currentStripe >= stripes.size()) {
+          batch.size = 0;
+          return false;
+        }
+        readStripe();
+      }
+
+      int batchSize = computeBatchSize(batch.getMaxSize());
+
+      rowInStripe += batchSize;
+      reader.setVectorColumnCount(batch.getDataColumnCount());
+      reader.nextBatch(batch, batchSize);
+      batch.selectedInUse = false;
+      batch.size = batchSize;
+      advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
+      return batch.size  != 0;
+    } catch (IOException e) {
+      // Rethrow exception with file name in log message
+      throw new IOException("Error reading file: " + path, e);
+    }
+  }
+
+  private int computeBatchSize(long targetBatchSize) {
+    final int batchSize;
+    // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row
+    // groups are selected then marker position is set to the end of range (subset of row groups
+    // within strip). Batch size computed out of marker position makes sure that batch size is
+    // aware of row group boundary and will not cause overflow when reading rows
+    // illustration of this case is here https://issues.apache.org/jira/browse/HIVE-6287
+    if (rowIndexStride != 0 && includedRowGroups != null && rowInStripe < rowCountInStripe) {
+      int startRowGroup = (int) (rowInStripe / rowIndexStride);
+      if (!includedRowGroups[startRowGroup]) {
+        while (startRowGroup < includedRowGroups.length && !includedRowGroups[startRowGroup]) {
+          startRowGroup += 1;
+        }
+      }
+
+      int endRowGroup = startRowGroup;
+      while (endRowGroup < includedRowGroups.length && includedRowGroups[endRowGroup]) {
+        endRowGroup += 1;
+      }
+
+      final long markerPosition =
+          (endRowGroup * rowIndexStride) < rowCountInStripe ? (endRowGroup * rowIndexStride)
+              : rowCountInStripe;
+      batchSize = (int) Math.min(targetBatchSize, (markerPosition - rowInStripe));
+
+      if (isLogDebugEnabled && batchSize < targetBatchSize) {
+        LOG.debug("markerPosition: " + markerPosition + " batchSize: " + batchSize);
+      }
+    } else {
+      batchSize = (int) Math.min(targetBatchSize, (rowCountInStripe - rowInStripe));
+    }
+    return batchSize;
+  }
+
+  @Override
+  public void close() throws IOException {
+    clearStreams();
+    dataReader.close();
+  }
+
+  @Override
+  public long getRowNumber() {
+    return rowInStripe + rowBaseInStripe + firstRow;
+  }
+
+  /**
+   * Return the fraction of rows that have been read from the selected.
+   * section of the file
+   *
+   * @return fraction between 0.0 and 1.0 of rows consumed
+   */
+  @Override
+  public float getProgress() {
+    return ((float) rowBaseInStripe + rowInStripe) / totalRowCount;
+  }
+
+  private int findStripe(long rowNumber) {
+    for (int i = 0; i < stripes.size(); i++) {
+      StripeInformation stripe = stripes.get(i);
+      if (stripe.getNumberOfRows() > rowNumber) {
+        return i;
+      }
+      rowNumber -= stripe.getNumberOfRows();
+    }
+    throw new IllegalArgumentException("Seek after the end of reader range");
+  }
+
+  public OrcIndex readRowIndex(int stripeIndex, boolean[] included,
+                               boolean[] sargColumns) throws IOException {
+    return readRowIndex(stripeIndex, included, null, null, sargColumns);
+  }
+
+  public OrcIndex readRowIndex(int stripeIndex, boolean[] included,
+                               OrcProto.RowIndex[] indexes,
+                               OrcProto.BloomFilterIndex[] bloomFilterIndex,
+                               boolean[] sargColumns) throws IOException {
+    StripeInformation stripe = stripes.get(stripeIndex);
+    OrcProto.StripeFooter stripeFooter = null;
+    // if this is the current stripe, use the cached objects.
+    if (stripeIndex == currentStripe) {
+      stripeFooter = this.stripeFooter;
+      indexes = indexes == null ? this.indexes : indexes;
+      bloomFilterIndex = bloomFilterIndex == null ? this.bloomFilterIndices : bloomFilterIndex;
+      sargColumns = sargColumns == null ?
+          (sargApp == null ? null : sargApp.sargColumns) : sargColumns;
+    }
+    return dataReader.readRowIndex(stripe, stripeFooter, included, indexes, sargColumns,
+        bloomFilterIndex);
+  }
+
+  private void seekToRowEntry(TreeReaderFactory.TreeReader reader, int rowEntry)
+      throws IOException {
+    PositionProvider[] index = new PositionProvider[indexes.length];
+    for (int i = 0; i < indexes.length; ++i) {
+      if (indexes[i] != null) {
+        index[i] = new PositionProviderImpl(indexes[i].getEntry(rowEntry));
+      }
+    }
+    reader.seek(index);
+  }
+
+  @Override
+  public void seekToRow(long rowNumber) throws IOException {
+    if (rowNumber < 0) {
+      throw new IllegalArgumentException("Seek to a negative row number " +
+          rowNumber);
+    } else if (rowNumber < firstRow) {
+      throw new IllegalArgumentException("Seek before reader range " +
+          rowNumber);
+    }
+    // convert to our internal form (rows from the beginning of slice)
+    rowNumber -= firstRow;
+
+    // move to the right stripe
+    int rightStripe = findStripe(rowNumber);
+    if (rightStripe != currentStripe) {
+      currentStripe = rightStripe;
+      readStripe();
+    }
+    readRowIndex(currentStripe, included, sargApp == null ? null : sargApp.sargColumns);
+
+    // if we aren't to the right row yet, advance in the stripe.
+    advanceToNextRow(reader, rowNumber, true);
+  }
+
+  private static final String TRANSLATED_SARG_SEPARATOR = "_";
+  public static String encodeTranslatedSargColumn(int rootColumn, Integer indexInSourceTable) {
+    return rootColumn + TRANSLATED_SARG_SEPARATOR
+        + ((indexInSourceTable == null) ? -1 : indexInSourceTable);
+  }
+
+  public static int[] mapTranslatedSargColumns(
+      List<OrcProto.Type> types, List<PredicateLeaf> sargLeaves) {
+    int[] result = new int[sargLeaves.size()];
+    OrcProto.Type lastRoot = null; // Root will be the same for everyone as of now.
+    String lastRootStr = null;
+    for (int i = 0; i < result.length; ++i) {
+      String[] rootAndIndex = sargLeaves.get(i).getColumnName().split(TRANSLATED_SARG_SEPARATOR);
+      assert rootAndIndex.length == 2;
+      String rootStr = rootAndIndex[0], indexStr = rootAndIndex[1];
+      int index = Integer.parseInt(indexStr);
+      // First, check if the column even maps to anything.
+      if (index == -1) {
+        result[i] = -1;
+        continue;
+      }
+      assert index >= 0;
+      // Then, find the root type if needed.
+      if (!rootStr.equals(lastRootStr)) {
+        lastRoot = types.get(Integer.parseInt(rootStr));
+        lastRootStr = rootStr;
+      }
+      // Subtypes of the root types correspond, in order, to the columns in the table schema
+      // (disregarding schema evolution that doesn't presently work). Get the index for the
+      // corresponding subtype.
+      result[i] = lastRoot.getSubtypes(index);
+    }
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RecordReaderUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RecordReaderUtils.java b/orc/src/java/org/apache/hive/orc/impl/RecordReaderUtils.java
new file mode 100644
index 0000000..16af69d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RecordReaderUtils.java
@@ -0,0 +1,578 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
+import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper;
+import org.apache.hive.orc.CompressionCodec;
+import org.apache.hive.orc.DataReader;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StripeInformation;
+
+import com.google.common.collect.ComparisonChain;
+
+/**
+ * Stateless methods shared between RecordReaderImpl and EncodedReaderImpl.
+ */
+public class RecordReaderUtils {
+  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
+
+  private static class DefaultDataReader implements DataReader {
+    private FSDataInputStream file = null;
+    private final ByteBufferAllocatorPool pool;
+    private HadoopShims.ZeroCopyReaderShim zcr = null;
+    private final FileSystem fs;
+    private final Path path;
+    private final boolean useZeroCopy;
+    private final CompressionCodec codec;
+    private final int bufferSize;
+    private final int typeCount;
+
+    private DefaultDataReader(DefaultDataReader other) {
+      this.pool = other.pool;
+      this.bufferSize = other.bufferSize;
+      this.typeCount = other.typeCount;
+      this.fs = other.fs;
+      this.path = other.path;
+      this.useZeroCopy = other.useZeroCopy;
+      this.codec = other.codec;
+    }
+
+    private DefaultDataReader(DataReaderProperties properties) {
+      this.fs = properties.getFileSystem();
+      this.path = properties.getPath();
+      this.useZeroCopy = properties.getZeroCopy();
+      this.codec = PhysicalFsWriter.createCodec(properties.getCompression());
+      this.bufferSize = properties.getBufferSize();
+      this.typeCount = properties.getTypeCount();
+      if (useZeroCopy) {
+        this.pool = new ByteBufferAllocatorPool();
+      } else {
+        this.pool = null;
+      }
+    }
+
+    @Override
+    public void open() throws IOException {
+      this.file = fs.open(path);
+      if (useZeroCopy) {
+        zcr = RecordReaderUtils.createZeroCopyShim(file, codec, pool);
+      } else {
+        zcr = null;
+      }
+    }
+
+    @Override
+    public OrcIndex readRowIndex(StripeInformation stripe,
+                                 OrcProto.StripeFooter footer,
+                                 boolean[] included,
+                                 OrcProto.RowIndex[] indexes,
+                                 boolean[] sargColumns,
+                                 OrcProto.BloomFilterIndex[] bloomFilterIndices
+                                 ) throws IOException {
+      if (file == null) {
+        open();
+      }
+      if (footer == null) {
+        footer = readStripeFooter(stripe);
+      }
+      if (indexes == null) {
+        indexes = new OrcProto.RowIndex[typeCount];
+      }
+      if (bloomFilterIndices == null) {
+        bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
+      }
+      long offset = stripe.getOffset();
+      List<OrcProto.Stream> streams = footer.getStreamsList();
+      for (int i = 0; i < streams.size(); i++) {
+        OrcProto.Stream stream = streams.get(i);
+        OrcProto.Stream nextStream = null;
+        if (i < streams.size() - 1) {
+          nextStream = streams.get(i+1);
+        }
+        int col = stream.getColumn();
+        int len = (int) stream.getLength();
+        // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
+        // filter and combine the io to read row index and bloom filters for that column together
+        if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
+          boolean readBloomFilter = false;
+          if (sargColumns != null && sargColumns[col] &&
+              nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
+            len += nextStream.getLength();
+            i += 1;
+            readBloomFilter = true;
+          }
+          if ((included == null || included[col]) && indexes[col] == null) {
+            byte[] buffer = new byte[len];
+            file.readFully(offset, buffer, 0, buffer.length);
+            ByteBuffer bb = ByteBuffer.wrap(buffer);
+            indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
+                Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(),
+                codec, bufferSize));
+            if (readBloomFilter) {
+              bb.position((int) stream.getLength());
+              bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
+                  "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
+                  nextStream.getLength(), codec, bufferSize));
+            }
+          }
+        }
+        offset += len;
+      }
+
+      OrcIndex index = new OrcIndex(indexes, bloomFilterIndices);
+      return index;
+    }
+
+    @Override
+    public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
+      if (file == null) {
+        open();
+      }
+      long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
+      int tailLength = (int) stripe.getFooterLength();
+
+      // read the footer
+      ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
+      file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
+      return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream("footer",
+          Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
+          tailLength, codec, bufferSize));
+    }
+
+    @Override
+    public DiskRangeList readFileData(
+        DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException {
+      return RecordReaderUtils.readDiskRanges(file, zcr, baseOffset, range, doForceDirect);
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (pool != null) {
+        pool.clear();
+      }
+      // close both zcr and file
+      try (HadoopShims.ZeroCopyReaderShim myZcr = zcr) {
+        if (file != null) {
+          file.close();
+        }
+      }
+    }
+
+    @Override
+    public boolean isTrackingDiskRanges() {
+      return zcr != null;
+    }
+
+    @Override
+    public void releaseBuffer(ByteBuffer buffer) {
+      zcr.releaseBuffer(buffer);
+    }
+
+    @Override
+    public DataReader clone() {
+      return new DefaultDataReader(this);
+    }
+
+  }
+
+  public static DataReader createDefaultDataReader(DataReaderProperties properties) {
+    return new DefaultDataReader(properties);
+  }
+
+  public static boolean[] findPresentStreamsByColumn(
+      List<OrcProto.Stream> streamList, List<OrcProto.Type> types) {
+    boolean[] hasNull = new boolean[types.size()];
+    for(OrcProto.Stream stream: streamList) {
+      if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.PRESENT)) {
+        hasNull[stream.getColumn()] = true;
+      }
+    }
+    return hasNull;
+  }
+
+  /**
+   * Does region A overlap region B? The end points are inclusive on both sides.
+   * @param leftA A's left point
+   * @param rightA A's right point
+   * @param leftB B's left point
+   * @param rightB B's right point
+   * @return Does region A overlap region B?
+   */
+  static boolean overlap(long leftA, long rightA, long leftB, long rightB) {
+    if (leftA <= leftB) {
+      return rightA >= leftB;
+    }
+    return rightB >= leftA;
+  }
+
+  public static void addEntireStreamToRanges(
+      long offset, long length, CreateHelper list, boolean doMergeBuffers) {
+    list.addOrMerge(offset, offset + length, doMergeBuffers, false);
+  }
+
+  public static void addRgFilteredStreamToRanges(OrcProto.Stream stream,
+      boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index,
+      OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull,
+      long offset, long length, CreateHelper list, boolean doMergeBuffers) {
+    for (int group = 0; group < includedRowGroups.length; ++group) {
+      if (!includedRowGroups[group]) continue;
+      int posn = getIndexPosition(
+          encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull);
+      long start = index.getEntry(group).getPositions(posn);
+      final long nextGroupOffset;
+      boolean isLast = group == (includedRowGroups.length - 1);
+      nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn);
+
+      start += offset;
+      long end = offset + estimateRgEndOffset(
+          isCompressed, isLast, nextGroupOffset, length, compressionSize);
+      list.addOrMerge(start, end, doMergeBuffers, true);
+    }
+  }
+
+  public static long estimateRgEndOffset(boolean isCompressed, boolean isLast,
+      long nextGroupOffset, long streamLength, int bufferSize) {
+    // figure out the worst case last location
+    // if adjacent groups have the same compressed block offset then stretch the slop
+    // by factor of 2 to safely accommodate the next compression block.
+    // One for the current compression block and another for the next compression block.
+    long slop = isCompressed ? 2 * (OutStream.HEADER_SIZE + bufferSize) : WORST_UNCOMPRESSED_SLOP;
+    return isLast ? streamLength : Math.min(streamLength, nextGroupOffset + slop);
+  }
+
+  private static final int BYTE_STREAM_POSITIONS = 1;
+  private static final int RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1;
+  private static final int BITFIELD_POSITIONS = RUN_LENGTH_BYTE_POSITIONS + 1;
+  private static final int RUN_LENGTH_INT_POSITIONS = BYTE_STREAM_POSITIONS + 1;
+
+  /**
+   * Get the offset in the index positions for the column that the given
+   * stream starts.
+   * @param columnEncoding the encoding of the column
+   * @param columnType the type of the column
+   * @param streamType the kind of the stream
+   * @param isCompressed is the file compressed
+   * @param hasNulls does the column have a PRESENT stream?
+   * @return the number of positions that will be used for that stream
+   */
+  public static int getIndexPosition(OrcProto.ColumnEncoding.Kind columnEncoding,
+                              OrcProto.Type.Kind columnType,
+                              OrcProto.Stream.Kind streamType,
+                              boolean isCompressed,
+                              boolean hasNulls) {
+    if (streamType == OrcProto.Stream.Kind.PRESENT) {
+      return 0;
+    }
+    int compressionValue = isCompressed ? 1 : 0;
+    int base = hasNulls ? (BITFIELD_POSITIONS + compressionValue) : 0;
+    switch (columnType) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+      case DATE:
+      case STRUCT:
+      case MAP:
+      case LIST:
+      case UNION:
+        return base;
+      case CHAR:
+      case VARCHAR:
+      case STRING:
+        if (columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+            columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+          return base;
+        } else {
+          if (streamType == OrcProto.Stream.Kind.DATA) {
+            return base;
+          } else {
+            return base + BYTE_STREAM_POSITIONS + compressionValue;
+          }
+        }
+      case BINARY:
+        if (streamType == OrcProto.Stream.Kind.DATA) {
+          return base;
+        }
+        return base + BYTE_STREAM_POSITIONS + compressionValue;
+      case DECIMAL:
+        if (streamType == OrcProto.Stream.Kind.DATA) {
+          return base;
+        }
+        return base + BYTE_STREAM_POSITIONS + compressionValue;
+      case TIMESTAMP:
+        if (streamType == OrcProto.Stream.Kind.DATA) {
+          return base;
+        }
+        return base + RUN_LENGTH_INT_POSITIONS + compressionValue;
+      default:
+        throw new IllegalArgumentException("Unknown type " + columnType);
+    }
+  }
+
+  // for uncompressed streams, what is the most overlap with the following set
+  // of rows (long vint literal group).
+  static final int WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512;
+
+  /**
+   * Is this stream part of a dictionary?
+   * @return is this part of a dictionary?
+   */
+  public static boolean isDictionary(OrcProto.Stream.Kind kind,
+                              OrcProto.ColumnEncoding encoding) {
+    assert kind != OrcProto.Stream.Kind.DICTIONARY_COUNT;
+    OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind();
+    return kind == OrcProto.Stream.Kind.DICTIONARY_DATA ||
+      (kind == OrcProto.Stream.Kind.LENGTH &&
+       (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+        encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2));
+  }
+
+  /**
+   * Build a string representation of a list of disk ranges.
+   * @param range ranges to stringify
+   * @return the resulting string
+   */
+  public static String stringifyDiskRanges(DiskRangeList range) {
+    StringBuilder buffer = new StringBuilder();
+    buffer.append("[");
+    boolean isFirst = true;
+    while (range != null) {
+      if (!isFirst) {
+        buffer.append(", {");
+      } else {
+        buffer.append("{");
+      }
+      isFirst = false;
+      buffer.append(range.toString());
+      buffer.append("}");
+      range = range.next;
+    }
+    buffer.append("]");
+    return buffer.toString();
+  }
+
+  /**
+   * Read the list of ranges from the file.
+   * @param file the file to read
+   * @param base the base of the stripe
+   * @param range the disk ranges within the stripe to read
+   * @return the bytes read for each disk range, which is the same length as
+   *    ranges
+   * @throws IOException
+   */
+  static DiskRangeList readDiskRanges(FSDataInputStream file,
+                                      HadoopShims.ZeroCopyReaderShim zcr,
+                                 long base,
+                                 DiskRangeList range,
+                                 boolean doForceDirect) throws IOException {
+    if (range == null) return null;
+    DiskRangeList prev = range.prev;
+    if (prev == null) {
+      prev = new MutateHelper(range);
+    }
+    while (range != null) {
+      if (range.hasData()) {
+        range = range.next;
+        continue;
+      }
+      int len = (int) (range.getEnd() - range.getOffset());
+      long off = range.getOffset();
+      if (zcr != null) {
+        file.seek(base + off);
+        boolean hasReplaced = false;
+        while (len > 0) {
+          ByteBuffer partial = zcr.readBuffer(len, false);
+          BufferChunk bc = new BufferChunk(partial, off);
+          if (!hasReplaced) {
+            range.replaceSelfWith(bc);
+            hasReplaced = true;
+          } else {
+            range.insertAfter(bc);
+          }
+          range = bc;
+          int read = partial.remaining();
+          len -= read;
+          off += read;
+        }
+      } else {
+        // Don't use HDFS ByteBuffer API because it has no readFully, and is buggy and pointless.
+        byte[] buffer = new byte[len];
+        file.readFully((base + off), buffer, 0, buffer.length);
+        ByteBuffer bb = null;
+        if (doForceDirect) {
+          bb = ByteBuffer.allocateDirect(len);
+          bb.put(buffer);
+          bb.position(0);
+          bb.limit(len);
+        } else {
+          bb = ByteBuffer.wrap(buffer);
+        }
+        range = range.replaceSelfWith(new BufferChunk(bb, range.getOffset()));
+      }
+      range = range.next;
+    }
+    return prev.next;
+  }
+
+
+  static List<DiskRange> getStreamBuffers(DiskRangeList range, long offset, long length) {
+    // This assumes sorted ranges (as do many other parts of ORC code.
+    ArrayList<DiskRange> buffers = new ArrayList<DiskRange>();
+    if (length == 0) return buffers;
+    long streamEnd = offset + length;
+    boolean inRange = false;
+    while (range != null) {
+      if (!inRange) {
+        if (range.getEnd() <= offset) {
+          range = range.next;
+          continue; // Skip until we are in range.
+        }
+        inRange = true;
+        if (range.getOffset() < offset) {
+          // Partial first buffer, add a slice of it.
+          buffers.add(range.sliceAndShift(offset, Math.min(streamEnd, range.getEnd()), -offset));
+          if (range.getEnd() >= streamEnd) break; // Partial first buffer is also partial last buffer.
+          range = range.next;
+          continue;
+        }
+      } else if (range.getOffset() >= streamEnd) {
+        break;
+      }
+      if (range.getEnd() > streamEnd) {
+        // Partial last buffer (may also be the first buffer), add a slice of it.
+        buffers.add(range.sliceAndShift(range.getOffset(), streamEnd, -offset));
+        break;
+      }
+      // Buffer that belongs entirely to one stream.
+      // TODO: ideally we would want to reuse the object and remove it from the list, but we cannot
+      //       because bufferChunks is also used by clearStreams for zcr. Create a useless dup.
+      buffers.add(range.sliceAndShift(range.getOffset(), range.getEnd(), -offset));
+      if (range.getEnd() == streamEnd) break;
+      range = range.next;
+    }
+    return buffers;
+  }
+
+  static HadoopShims.ZeroCopyReaderShim createZeroCopyShim(FSDataInputStream file,
+      CompressionCodec codec, ByteBufferAllocatorPool pool) throws IOException {
+    if ((codec == null || ((codec instanceof DirectDecompressionCodec)
+            && ((DirectDecompressionCodec) codec).isAvailable()))) {
+      /* codec is null or is available */
+      return SHIMS.getZeroCopyReader(file, pool);
+    }
+    return null;
+  }
+
+  // this is an implementation copied from ElasticByteBufferPool in hadoop-2,
+  // which lacks a clear()/clean() operation
+  public final static class ByteBufferAllocatorPool implements HadoopShims.ByteBufferPoolShim {
+    private static final class Key implements Comparable<Key> {
+      private final int capacity;
+      private final long insertionGeneration;
+
+      Key(int capacity, long insertionGeneration) {
+        this.capacity = capacity;
+        this.insertionGeneration = insertionGeneration;
+      }
+
+      @Override
+      public int compareTo(Key other) {
+        return ComparisonChain.start().compare(capacity, other.capacity)
+            .compare(insertionGeneration, other.insertionGeneration).result();
+      }
+
+      @Override
+      public boolean equals(Object rhs) {
+        if (rhs == null) {
+          return false;
+        }
+        try {
+          Key o = (Key) rhs;
+          return (compareTo(o) == 0);
+        } catch (ClassCastException e) {
+          return false;
+        }
+      }
+
+      @Override
+      public int hashCode() {
+        return new HashCodeBuilder().append(capacity).append(insertionGeneration)
+            .toHashCode();
+      }
+    }
+
+    private final TreeMap<Key, ByteBuffer> buffers = new TreeMap<Key, ByteBuffer>();
+
+    private final TreeMap<Key, ByteBuffer> directBuffers = new TreeMap<Key, ByteBuffer>();
+
+    private long currentGeneration = 0;
+
+    private final TreeMap<Key, ByteBuffer> getBufferTree(boolean direct) {
+      return direct ? directBuffers : buffers;
+    }
+
+    public void clear() {
+      buffers.clear();
+      directBuffers.clear();
+    }
+
+    @Override
+    public ByteBuffer getBuffer(boolean direct, int length) {
+      TreeMap<Key, ByteBuffer> tree = getBufferTree(direct);
+      Map.Entry<Key, ByteBuffer> entry = tree.ceilingEntry(new Key(length, 0));
+      if (entry == null) {
+        return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer
+            .allocate(length);
+      }
+      tree.remove(entry.getKey());
+      return entry.getValue();
+    }
+
+    @Override
+    public void putBuffer(ByteBuffer buffer) {
+      TreeMap<Key, ByteBuffer> tree = getBufferTree(buffer.isDirect());
+      while (true) {
+        Key key = new Key(buffer.capacity(), currentGeneration++);
+        if (!tree.containsKey(key)) {
+          tree.put(key, buffer);
+          return;
+        }
+        // Buffers are indexed by (capacity, generation).
+        // If our key is not unique on the first try, we try again
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RedBlackTree.java b/orc/src/java/org/apache/hive/orc/impl/RedBlackTree.java
new file mode 100644
index 0000000..a340c50
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RedBlackTree.java
@@ -0,0 +1,309 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+/**
+ * A memory efficient red-black tree that does not allocate any objects per
+ * an element. This class is abstract and assumes that the child class
+ * handles the key and comparisons with the key.
+ */
+abstract class RedBlackTree {
+  public static final int NULL = -1;
+
+  // Various values controlling the offset of the data within the array.
+  private static final int LEFT_OFFSET = 0;
+  private static final int RIGHT_OFFSET = 1;
+  private static final int ELEMENT_SIZE = 2;
+
+  protected int size = 0;
+  private final DynamicIntArray data;
+  protected int root = NULL;
+  protected int lastAdd = 0;
+  private boolean wasAdd = false;
+
+  /**
+   * Create a set with the given initial capacity.
+   */
+  public RedBlackTree(int initialCapacity) {
+    data = new DynamicIntArray(initialCapacity * ELEMENT_SIZE);
+  }
+
+  /**
+   * Insert a new node into the data array, growing the array as necessary.
+   *
+   * @return Returns the position of the new node.
+   */
+  private int insert(int left, int right, boolean isRed) {
+    int position = size;
+    size += 1;
+    setLeft(position, left, isRed);
+    setRight(position, right);
+    return position;
+  }
+
+  /**
+   * Compare the value at the given position to the new value.
+   * @return 0 if the values are the same, -1 if the new value is smaller and
+   *         1 if the new value is larger.
+   */
+  protected abstract int compareValue(int position);
+
+  /**
+   * Is the given node red as opposed to black? To prevent having an extra word
+   * in the data array, we just the low bit on the left child index.
+   */
+  protected boolean isRed(int position) {
+    return position != NULL &&
+        (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
+  }
+
+  /**
+   * Set the red bit true or false.
+   */
+  private void setRed(int position, boolean isRed) {
+    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
+    if (isRed) {
+      data.set(offset, data.get(offset) | 1);
+    } else {
+      data.set(offset, data.get(offset) & ~1);
+    }
+  }
+
+  /**
+   * Get the left field of the given position.
+   */
+  protected int getLeft(int position) {
+    return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
+  }
+
+  /**
+   * Get the right field of the given position.
+   */
+  protected int getRight(int position) {
+    return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
+  }
+
+  /**
+   * Set the left field of the given position.
+   * Note that we are storing the node color in the low bit of the left pointer.
+   */
+  private void setLeft(int position, int left) {
+    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
+    data.set(offset, (left << 1) | (data.get(offset) & 1));
+  }
+
+  /**
+   * Set the left field of the given position.
+   * Note that we are storing the node color in the low bit of the left pointer.
+   */
+  private void setLeft(int position, int left, boolean isRed) {
+    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
+    data.set(offset, (left << 1) | (isRed ? 1 : 0));
+  }
+
+  /**
+   * Set the right field of the given position.
+   */
+  private void setRight(int position, int right) {
+    data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
+  }
+
+  /**
+   * Insert or find a given key in the tree and rebalance the tree correctly.
+   * Rebalancing restores the red-black aspect of the tree to maintain the
+   * invariants:
+   *   1. If a node is red, both of its children are black.
+   *   2. Each child of a node has the same black height (the number of black
+   *      nodes between it and the leaves of the tree).
+   *
+   * Inserted nodes are at the leaves and are red, therefore there is at most a
+   * violation of rule 1 at the node we just put in. Instead of always keeping
+   * the parents, this routine passing down the context.
+   *
+   * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
+   * left-right mirror images of each other). See Algorighms by Cormen,
+   * Leiserson, and Rivest for the explaination of the subcases.
+   *
+   * @param node The node that we are fixing right now.
+   * @param fromLeft Did we come down from the left?
+   * @param parent Nodes' parent
+   * @param grandparent Parent's parent
+   * @param greatGrandparent Grandparent's parent
+   * @return Does parent also need to be checked and/or fixed?
+   */
+  private boolean add(int node, boolean fromLeft, int parent,
+                      int grandparent, int greatGrandparent) {
+    if (node == NULL) {
+      if (root == NULL) {
+        lastAdd = insert(NULL, NULL, false);
+        root = lastAdd;
+        wasAdd = true;
+        return false;
+      } else {
+        lastAdd = insert(NULL, NULL, true);
+        node = lastAdd;
+        wasAdd = true;
+        // connect the new node into the tree
+        if (fromLeft) {
+          setLeft(parent, node);
+        } else {
+          setRight(parent, node);
+        }
+      }
+    } else {
+      int compare = compareValue(node);
+      boolean keepGoing;
+
+      // Recurse down to find where the node needs to be added
+      if (compare < 0) {
+        keepGoing = add(getLeft(node), true, node, parent, grandparent);
+      } else if (compare > 0) {
+        keepGoing = add(getRight(node), false, node, parent, grandparent);
+      } else {
+        lastAdd = node;
+        wasAdd = false;
+        return false;
+      }
+
+      // we don't need to fix the root (because it is always set to black)
+      if (node == root || !keepGoing) {
+        return false;
+      }
+    }
+
+
+    // Do we need to fix this node? Only if there are two reds right under each
+    // other.
+    if (isRed(node) && isRed(parent)) {
+      if (parent == getLeft(grandparent)) {
+        int uncle = getRight(grandparent);
+        if (isRed(uncle)) {
+          // case 1.1
+          setRed(parent, false);
+          setRed(uncle, false);
+          setRed(grandparent, true);
+          return true;
+        } else {
+          if (node == getRight(parent)) {
+            // case 1.2
+            // swap node and parent
+            int tmp = node;
+            node = parent;
+            parent = tmp;
+            // left-rotate on node
+            setLeft(grandparent, parent);
+            setRight(node, getLeft(parent));
+            setLeft(parent, node);
+          }
+
+          // case 1.2 and 1.3
+          setRed(parent, false);
+          setRed(grandparent, true);
+
+          // right-rotate on grandparent
+          if (greatGrandparent == NULL) {
+            root = parent;
+          } else if (getLeft(greatGrandparent) == grandparent) {
+            setLeft(greatGrandparent, parent);
+          } else {
+            setRight(greatGrandparent, parent);
+          }
+          setLeft(grandparent, getRight(parent));
+          setRight(parent, grandparent);
+          return false;
+        }
+      } else {
+        int uncle = getLeft(grandparent);
+        if (isRed(uncle)) {
+          // case 2.1
+          setRed(parent, false);
+          setRed(uncle, false);
+          setRed(grandparent, true);
+          return true;
+        } else {
+          if (node == getLeft(parent)) {
+            // case 2.2
+            // swap node and parent
+            int tmp = node;
+            node = parent;
+            parent = tmp;
+            // right-rotate on node
+            setRight(grandparent, parent);
+            setLeft(node, getRight(parent));
+            setRight(parent, node);
+          }
+          // case 2.2 and 2.3
+          setRed(parent, false);
+          setRed(grandparent, true);
+          // left-rotate on grandparent
+          if (greatGrandparent == NULL) {
+            root = parent;
+          } else if (getRight(greatGrandparent) == grandparent) {
+            setRight(greatGrandparent, parent);
+          } else {
+            setLeft(greatGrandparent, parent);
+          }
+          setRight(grandparent, getLeft(parent));
+          setLeft(parent, grandparent);
+          return false;
+        }
+      }
+    } else {
+      return true;
+    }
+  }
+
+  /**
+   * Add the new key to the tree.
+   * @return true if the element is a new one.
+   */
+  protected boolean add() {
+    add(root, false, NULL, NULL, NULL);
+    if (wasAdd) {
+      setRed(root, false);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * Get the number of elements in the set.
+   */
+  public int size() {
+    return size;
+  }
+
+  /**
+   * Reset the table to empty.
+   */
+  public void clear() {
+    root = NULL;
+    size = 0;
+    data.clear();
+  }
+
+  /**
+   * Get the buffer size in bytes.
+   */
+  public long getSizeInBytes() {
+    return data.getSizeInBytes();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/RunLengthByteReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/RunLengthByteReader.java b/orc/src/java/org/apache/hive/orc/impl/RunLengthByteReader.java
new file mode 100644
index 0000000..1dd5dab
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/RunLengthByteReader.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+
+/**
+ * A reader that reads a sequence of bytes. A control byte is read before
+ * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
+ * byte is -1 to -128, 1 to 128 literal byte values follow.
+ */
+public class RunLengthByteReader {
+  private InStream input;
+  private final byte[] literals =
+    new byte[RunLengthByteWriter.MAX_LITERAL_SIZE];
+  private int numLiterals = 0;
+  private int used = 0;
+  private boolean repeat = false;
+
+  public RunLengthByteReader(InStream input) throws IOException {
+    this.input = input;
+  }
+
+  public void setInStream(InStream input) {
+    this.input = input;
+  }
+
+  private void readValues(boolean ignoreEof) throws IOException {
+    int control = input.read();
+    used = 0;
+    if (control == -1) {
+      if (!ignoreEof) {
+        throw new EOFException("Read past end of buffer RLE byte from " + input);
+      }
+      used = numLiterals = 0;
+      return;
+    } else if (control < 0x80) {
+      repeat = true;
+      numLiterals = control + RunLengthByteWriter.MIN_REPEAT_SIZE;
+      int val = input.read();
+      if (val == -1) {
+        throw new EOFException("Reading RLE byte got EOF");
+      }
+      literals[0] = (byte) val;
+    } else {
+      repeat = false;
+      numLiterals = 0x100 - control;
+      int bytes = 0;
+      while (bytes < numLiterals) {
+        int result = input.read(literals, bytes, numLiterals - bytes);
+        if (result == -1) {
+          throw new EOFException("Reading RLE byte literal got EOF in " + this);
+        }
+        bytes += result;
+      }
+    }
+  }
+
+  public boolean hasNext() throws IOException {
+    return used != numLiterals || input.available() > 0;
+  }
+
+  public byte next() throws IOException {
+    byte result;
+    if (used == numLiterals) {
+      readValues(false);
+    }
+    if (repeat) {
+      result = literals[0];
+    } else {
+      result = literals[used];
+    }
+    ++used;
+    return result;
+  }
+
+  public void nextVector(ColumnVector previous, long[] data, long size)
+      throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < size; i++) {
+      if (!previous.isNull[i]) {
+        data[i] = next();
+      } else {
+        // The default value of null for int types in vectorized
+        // processing is 1, so set that if the value is null
+        data[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && ((data[0] != data[i]) ||
+              (previous.isNull[0] != previous.isNull[i]))) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
+  /**
+   * Read the next size bytes into the data array, skipping over any slots
+   * where isNull is true.
+   * @param isNull if non-null, skip any rows where isNull[r] is true
+   * @param data the array to read into
+   * @param size the number of elements to read
+   * @throws IOException
+   */
+  public void nextVector(boolean[] isNull, int[] data,
+                         long size) throws IOException {
+    if (isNull == null) {
+      for(int i=0; i < size; ++i) {
+        data[i] = next();
+      }
+    } else {
+      for(int i=0; i < size; ++i) {
+        if (!isNull[i]) {
+          data[i] = next();
+        }
+      }
+    }
+  }
+
+  public void seek(PositionProvider index) throws IOException {
+    input.seek(index);
+    int consumed = (int) index.getNext();
+    if (consumed != 0) {
+      // a loop is required for cases where we break the run into two parts
+      while (consumed > 0) {
+        readValues(false);
+        used = consumed;
+        consumed -= numLiterals;
+      }
+    } else {
+      used = 0;
+      numLiterals = 0;
+    }
+  }
+
+  public void skip(long items) throws IOException {
+    while (items > 0) {
+      if (used == numLiterals) {
+        readValues(false);
+      }
+      long consume = Math.min(items, numLiterals - used);
+      used += consume;
+      items -= consume;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "byte rle " + (repeat ? "repeat" : "literal") + " used: " +
+        used + "/" + numLiterals + " from " + input;
+  }
+}


[24/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/ZeroCopyShims.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/ZeroCopyShims.java b/orc/src/java/org/apache/hive/orc/impl/ZeroCopyShims.java
new file mode 100644
index 0000000..6322801
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/ZeroCopyShims.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.ReadOption;
+import org.apache.hadoop.io.ByteBufferPool;
+
+class ZeroCopyShims {
+  private static final class ByteBufferPoolAdapter implements ByteBufferPool {
+    private HadoopShims.ByteBufferPoolShim pool;
+
+    public ByteBufferPoolAdapter(HadoopShims.ByteBufferPoolShim pool) {
+      this.pool = pool;
+    }
+
+    @Override
+    public final ByteBuffer getBuffer(boolean direct, int length) {
+      return this.pool.getBuffer(direct, length);
+    }
+
+    @Override
+    public final void putBuffer(ByteBuffer buffer) {
+      this.pool.putBuffer(buffer);
+    }
+  }
+
+  private static final class ZeroCopyAdapter implements HadoopShims.ZeroCopyReaderShim {
+    private final FSDataInputStream in;
+    private final ByteBufferPoolAdapter pool;
+    private final static EnumSet<ReadOption> CHECK_SUM = EnumSet
+        .noneOf(ReadOption.class);
+    private final static EnumSet<ReadOption> NO_CHECK_SUM = EnumSet
+        .of(ReadOption.SKIP_CHECKSUMS);
+
+    public ZeroCopyAdapter(FSDataInputStream in,
+                           HadoopShims.ByteBufferPoolShim poolshim) {
+      this.in = in;
+      if (poolshim != null) {
+        pool = new ByteBufferPoolAdapter(poolshim);
+      } else {
+        pool = null;
+      }
+    }
+
+    public final ByteBuffer readBuffer(int maxLength, boolean verifyChecksums)
+        throws IOException {
+      EnumSet<ReadOption> options = NO_CHECK_SUM;
+      if (verifyChecksums) {
+        options = CHECK_SUM;
+      }
+      return this.in.read(this.pool, maxLength, options);
+    }
+
+    public final void releaseBuffer(ByteBuffer buffer) {
+      this.in.releaseBuffer(buffer);
+    }
+
+    @Override
+    public final void close() throws IOException {
+      this.in.close();
+    }
+  }
+
+  public static HadoopShims.ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
+                                                                 HadoopShims.ByteBufferPoolShim pool) throws IOException {
+    return new ZeroCopyAdapter(in, pool);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/ZlibCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/ZlibCodec.java b/orc/src/java/org/apache/hive/orc/impl/ZlibCodec.java
new file mode 100644
index 0000000..16bd955
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/ZlibCodec.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+import javax.annotation.Nullable;
+
+import org.apache.hive.orc.CompressionCodec;
+
+public class ZlibCodec implements CompressionCodec, DirectDecompressionCodec {
+  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
+  private Boolean direct = null;
+
+  private final int level;
+  private final int strategy;
+
+  public ZlibCodec() {
+    level = Deflater.DEFAULT_COMPRESSION;
+    strategy = Deflater.DEFAULT_STRATEGY;
+  }
+
+  private ZlibCodec(int level, int strategy) {
+    this.level = level;
+    this.strategy = strategy;
+  }
+
+  @Override
+  public boolean compress(ByteBuffer in, ByteBuffer out,
+                          ByteBuffer overflow) throws IOException {
+    Deflater deflater = new Deflater(level, true);
+    deflater.setStrategy(strategy);
+    int length = in.remaining();
+    deflater.setInput(in.array(), in.arrayOffset() + in.position(), length);
+    deflater.finish();
+    int outSize = 0;
+    int offset = out.arrayOffset() + out.position();
+    while (!deflater.finished() && (length > outSize)) {
+      int size = deflater.deflate(out.array(), offset, out.remaining());
+      out.position(size + out.position());
+      outSize += size;
+      offset += size;
+      // if we run out of space in the out buffer, use the overflow
+      if (out.remaining() == 0) {
+        if (overflow == null) {
+          deflater.end();
+          return false;
+        }
+        out = overflow;
+        offset = out.arrayOffset() + out.position();
+      }
+    }
+    deflater.end();
+    return length > outSize;
+  }
+
+  @Override
+  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
+
+    if(in.isDirect() && out.isDirect()) {
+      directDecompress(in, out);
+      return;
+    }
+
+    Inflater inflater = new Inflater(true);
+    inflater.setInput(in.array(), in.arrayOffset() + in.position(),
+                      in.remaining());
+    while (!(inflater.finished() || inflater.needsDictionary() ||
+             inflater.needsInput())) {
+      try {
+        int count = inflater.inflate(out.array(),
+                                     out.arrayOffset() + out.position(),
+                                     out.remaining());
+        out.position(count + out.position());
+      } catch (DataFormatException dfe) {
+        throw new IOException("Bad compression data", dfe);
+      }
+    }
+    out.flip();
+    inflater.end();
+    in.position(in.limit());
+  }
+
+  @Override
+  public boolean isAvailable() {
+    if (direct == null) {
+      // see nowrap option in new Inflater(boolean) which disables zlib headers
+      try {
+        if (SHIMS.getDirectDecompressor(
+            HadoopShims.DirectCompressionType.ZLIB_NOHEADER) != null) {
+          direct = Boolean.valueOf(true);
+        } else {
+          direct = Boolean.valueOf(false);
+        }
+      } catch (UnsatisfiedLinkError ule) {
+        direct = Boolean.valueOf(false);
+      }
+    }
+    return direct.booleanValue();
+  }
+
+  @Override
+  public void directDecompress(ByteBuffer in, ByteBuffer out)
+      throws IOException {
+    HadoopShims.DirectDecompressor decompressShim =
+        SHIMS.getDirectDecompressor(HadoopShims.DirectCompressionType.ZLIB_NOHEADER);
+    decompressShim.decompress(in, out);
+    out.flip(); // flip for read
+  }
+
+  @Override
+  public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) {
+
+    if (modifiers == null) {
+      return this;
+    }
+
+    int l = this.level;
+    int s = this.strategy;
+
+    for (Modifier m : modifiers) {
+      switch (m) {
+      case BINARY:
+        /* filtered == less LZ77, more huffman */
+        s = Deflater.FILTERED;
+        break;
+      case TEXT:
+        s = Deflater.DEFAULT_STRATEGY;
+        break;
+      case FASTEST:
+        // deflate_fast looking for 8 byte patterns
+        l = Deflater.BEST_SPEED;
+        break;
+      case FAST:
+        // deflate_fast looking for 16 byte patterns
+        l = Deflater.BEST_SPEED + 1;
+        break;
+      case DEFAULT:
+        // deflate_slow looking for 128 byte patterns
+        l = Deflater.DEFAULT_COMPRESSION;
+        break;
+      default:
+        break;
+      }
+    }
+    return new ZlibCodec(l, s);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/tools/FileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/tools/FileDump.java b/orc/src/java/org/apache/hive/orc/tools/FileDump.java
new file mode 100644
index 0000000..c6b68de
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/tools/FileDump.java
@@ -0,0 +1,946 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.tools;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.BloomFilterIO;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.Reader;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.impl.AcidStats;
+import org.apache.hive.orc.impl.ColumnStatisticsImpl;
+import org.apache.hive.orc.impl.OrcAcidUtils;
+import org.apache.hive.orc.impl.OrcIndex;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StripeInformation;
+import org.apache.hive.orc.StripeStatistics;
+import org.apache.hive.orc.impl.RecordReaderImpl;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONWriter;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
+/**
+ * A tool for printing out the file structure of ORC files.
+ */
+public final class FileDump {
+  public static final String UNKNOWN = "UNKNOWN";
+  public static final String SEPARATOR = Strings.repeat("_", 120) + "\n";
+  public static final int DEFAULT_BLOCK_SIZE = 256 * 1024 * 1024;
+  public static final String DEFAULT_BACKUP_PATH = System.getProperty("java.io.tmpdir");
+  public static final PathFilter HIDDEN_AND_SIDE_FILE_FILTER = new PathFilter() {
+    public boolean accept(Path p) {
+      String name = p.getName();
+      return !name.startsWith("_") && !name.startsWith(".") && !name.endsWith(
+          OrcAcidUtils.DELTA_SIDE_FILE_SUFFIX);
+    }
+  };
+
+  // not used
+  private FileDump() {
+  }
+
+  public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+
+    List<Integer> rowIndexCols = new ArrayList<Integer>(0);
+    Options opts = createOptions();
+    CommandLine cli = new GnuParser().parse(opts, args);
+
+    if (cli.hasOption('h')) {
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp("orcfiledump", opts);
+      return;
+    }
+
+    boolean dumpData = cli.hasOption('d');
+    boolean recover = cli.hasOption("recover");
+    boolean skipDump = cli.hasOption("skip-dump");
+    String backupPath = DEFAULT_BACKUP_PATH;
+    if (cli.hasOption("backup-path")) {
+      backupPath = cli.getOptionValue("backup-path");
+    }
+
+    if (cli.hasOption("r")) {
+      String val = cli.getOptionValue("r");
+      if (val != null && val.trim().equals("*")) {
+        rowIndexCols = null; // All the columns
+      } else {
+        String[] colStrs = cli.getOptionValue("r").split(",");
+        rowIndexCols = new ArrayList<Integer>(colStrs.length);
+        for (String colStr : colStrs) {
+          rowIndexCols.add(Integer.parseInt(colStr));
+        }
+      }
+    }
+
+    boolean printTimeZone = cli.hasOption('t');
+    boolean jsonFormat = cli.hasOption('j');
+    String[] files = cli.getArgs();
+    if (files.length == 0) {
+      System.err.println("Error : ORC files are not specified");
+      return;
+    }
+
+    // if the specified path is directory, iterate through all files and print the file dump
+    List<String> filesInPath = Lists.newArrayList();
+    for (String filename : files) {
+      Path path = new Path(filename);
+      filesInPath.addAll(getAllFilesInPath(path, conf));
+    }
+
+    if (dumpData) {
+      printData(filesInPath, conf);
+    } else if (recover && skipDump) {
+      recoverFiles(filesInPath, conf, backupPath);
+    } else {
+      if (jsonFormat) {
+        boolean prettyPrint = cli.hasOption('p');
+        JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint, printTimeZone);
+      } else {
+        printMetaData(filesInPath, conf, rowIndexCols, printTimeZone, recover, backupPath);
+      }
+    }
+  }
+
+  /**
+   * This method returns an ORC reader object if the specified file is readable. If the specified
+   * file has side file (_flush_length) file, then max footer offset will be read from the side
+   * file and orc reader will be created from that offset. Since both data file and side file
+   * use hflush() for flushing the data, there could be some inconsistencies and both files could be
+   * out-of-sync. Following are the cases under which null will be returned
+   *
+   * 1) If the file specified by path or its side file is still open for writes
+   * 2) If *_flush_length file does not return any footer offset
+   * 3) If *_flush_length returns a valid footer offset but the data file is not readable at that
+   *    position (incomplete data file)
+   * 4) If *_flush_length file length is not a multiple of 8, then reader will be created from
+   *    previous valid footer. If there is no such footer (file length > 0 and < 8), then null will
+   *    be returned
+   *
+   * Also, if this method detects any file corruption (mismatch between data file and side file)
+   * then it will add the corresponding file to the specified input list for corrupted files.
+   *
+   * In all other cases, where the file is readable this method will return a reader object.
+   *
+   * @param path - file to get reader for
+   * @param conf - configuration object
+   * @param corruptFiles - fills this list with all possible corrupted files
+   * @return - reader for the specified file or null
+   * @throws IOException
+   */
+  static Reader getReader(final Path path, final Configuration conf,
+      final List<String> corruptFiles) throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    long dataFileLen = fs.getFileStatus(path).getLen();
+    System.err.println("Processing data file " + path + " [length: " + dataFileLen + "]");
+    Path sideFile = OrcAcidUtils.getSideFile(path);
+    final boolean sideFileExists = fs.exists(sideFile);
+    boolean openDataFile = false;
+    boolean openSideFile = false;
+    if (fs instanceof DistributedFileSystem) {
+      DistributedFileSystem dfs = (DistributedFileSystem) fs;
+      openDataFile = !dfs.isFileClosed(path);
+      openSideFile = sideFileExists && !dfs.isFileClosed(sideFile);
+    }
+
+    if (openDataFile || openSideFile) {
+      if (openDataFile && openSideFile) {
+        System.err.println("Unable to perform file dump as " + path + " and " + sideFile +
+            " are still open for writes.");
+      } else if (openSideFile) {
+        System.err.println("Unable to perform file dump as " + sideFile +
+            " is still open for writes.");
+      } else {
+        System.err.println("Unable to perform file dump as " + path +
+            " is still open for writes.");
+      }
+
+      return null;
+    }
+
+    Reader reader = null;
+    if (sideFileExists) {
+      final long maxLen = OrcAcidUtils.getLastFlushLength(fs, path);
+      final long sideFileLen = fs.getFileStatus(sideFile).getLen();
+      System.err.println("Found flush length file " + sideFile
+          + " [length: " + sideFileLen + ", maxFooterOffset: " + maxLen + "]");
+      // no offsets read from side file
+      if (maxLen == -1) {
+
+        // if data file is larger than last flush length, then additional data could be recovered
+        if (dataFileLen > maxLen) {
+          System.err.println("Data file has more data than max footer offset:" + maxLen +
+              ". Adding data file to recovery list.");
+          if (corruptFiles != null) {
+            corruptFiles.add(path.toUri().toString());
+          }
+        }
+        return null;
+      }
+
+      try {
+        reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).maxLength(maxLen));
+
+        // if data file is larger than last flush length, then additional data could be recovered
+        if (dataFileLen > maxLen) {
+          System.err.println("Data file has more data than max footer offset:" + maxLen +
+              ". Adding data file to recovery list.");
+          if (corruptFiles != null) {
+            corruptFiles.add(path.toUri().toString());
+          }
+        }
+      } catch (Exception e) {
+        if (corruptFiles != null) {
+          corruptFiles.add(path.toUri().toString());
+        }
+        System.err.println("Unable to read data from max footer offset." +
+            " Adding data file to recovery list.");
+        return null;
+      }
+    } else {
+      reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+    }
+
+    return reader;
+  }
+
+  public static Collection<String> getAllFilesInPath(final Path path,
+      final Configuration conf) throws IOException {
+    List<String> filesInPath = Lists.newArrayList();
+    FileSystem fs = path.getFileSystem(conf);
+    FileStatus fileStatus = fs.getFileStatus(path);
+    if (fileStatus.isDir()) {
+      FileStatus[] fileStatuses = fs.listStatus(path, HIDDEN_AND_SIDE_FILE_FILTER);
+      for (FileStatus fileInPath : fileStatuses) {
+        if (fileInPath.isDir()) {
+          filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf));
+        } else {
+          filesInPath.add(fileInPath.getPath().toString());
+        }
+      }
+    } else {
+      filesInPath.add(path.toString());
+    }
+
+    return filesInPath;
+  }
+
+  private static void printData(List<String> files,
+      Configuration conf) throws IOException,
+      JSONException {
+    for (String file : files) {
+      try {
+        Path path = new Path(file);
+        Reader reader = getReader(path, conf, Lists.<String>newArrayList());
+        if (reader == null) {
+          continue;
+        }
+        printJsonData(reader);
+        System.out.println(SEPARATOR);
+      } catch (Exception e) {
+        System.err.println("Unable to dump data for file: " + file);
+        continue;
+      }
+    }
+  }
+
+  private static void printMetaData(List<String> files, Configuration conf,
+      List<Integer> rowIndexCols, boolean printTimeZone, final boolean recover,
+      final String backupPath)
+      throws IOException {
+    List<String> corruptFiles = Lists.newArrayList();
+    for (String filename : files) {
+      printMetaDataImpl(filename, conf, rowIndexCols, printTimeZone, corruptFiles);
+      System.out.println(SEPARATOR);
+    }
+
+    if (!corruptFiles.isEmpty()) {
+      if (recover) {
+        recoverFiles(corruptFiles, conf, backupPath);
+      } else {
+        System.err.println(corruptFiles.size() + " file(s) are corrupted." +
+            " Run the following command to recover corrupted files.\n");
+        String fileNames = Joiner.on(" ").skipNulls().join(corruptFiles);
+        System.err.println("hive --orcfiledump --recover --skip-dump " + fileNames);
+        System.out.println(SEPARATOR);
+      }
+    }
+  }
+
+  private static void printMetaDataImpl(final String filename,
+      final Configuration conf, List<Integer> rowIndexCols, final boolean printTimeZone,
+      final List<String> corruptFiles) throws IOException {
+    Path file = new Path(filename);
+    Reader reader = getReader(file, conf, corruptFiles);
+    // if we can create reader then footer is not corrupt and file will readable
+    if (reader == null) {
+      return;
+    }
+
+    System.out.println("Structure for " + filename);
+    System.out.println("File Version: " + reader.getFileVersion().getName() +
+        " with " + reader.getWriterVersion());
+    RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+    System.out.println("Rows: " + reader.getNumberOfRows());
+    System.out.println("Compression: " + reader.getCompressionKind());
+    if (reader.getCompressionKind() != CompressionKind.NONE) {
+      System.out.println("Compression size: " + reader.getCompressionSize());
+    }
+    System.out.println("Type: " + reader.getSchema().toString());
+    System.out.println("\nStripe Statistics:");
+    List<StripeStatistics> stripeStats = reader.getStripeStatistics();
+    for (int n = 0; n < stripeStats.size(); n++) {
+      System.out.println("  Stripe " + (n + 1) + ":");
+      StripeStatistics ss = stripeStats.get(n);
+      for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
+        System.out.println("    Column " + i + ": " +
+            ss.getColumnStatistics()[i].toString());
+      }
+    }
+    ColumnStatistics[] stats = reader.getStatistics();
+    int colCount = stats.length;
+    if (rowIndexCols == null) {
+      rowIndexCols = new ArrayList<>(colCount);
+      for (int i = 0; i < colCount; ++i) {
+        rowIndexCols.add(i);
+      }
+    }
+    System.out.println("\nFile Statistics:");
+    for (int i = 0; i < stats.length; ++i) {
+      System.out.println("  Column " + i + ": " + stats[i].toString());
+    }
+    System.out.println("\nStripes:");
+    int stripeIx = -1;
+    for (StripeInformation stripe : reader.getStripes()) {
+      ++stripeIx;
+      long stripeStart = stripe.getOffset();
+      OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+      if (printTimeZone) {
+        String tz = footer.getWriterTimezone();
+        if (tz == null || tz.isEmpty()) {
+          tz = UNKNOWN;
+        }
+        System.out.println("  Stripe: " + stripe.toString() + " timezone: " + tz);
+      } else {
+        System.out.println("  Stripe: " + stripe.toString());
+      }
+      long sectionStart = stripeStart;
+      for (OrcProto.Stream section : footer.getStreamsList()) {
+        String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
+        System.out.println("    Stream: column " + section.getColumn() +
+            " section " + kind + " start: " + sectionStart +
+            " length " + section.getLength());
+        sectionStart += section.getLength();
+      }
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        StringBuilder buf = new StringBuilder();
+        buf.append("    Encoding column ");
+        buf.append(i);
+        buf.append(": ");
+        buf.append(encoding.getKind());
+        if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+            encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+          buf.append("[");
+          buf.append(encoding.getDictionarySize());
+          buf.append("]");
+        }
+        System.out.println(buf);
+      }
+      if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+        // include the columns that are specified, only if the columns are included, bloom filter
+        // will be read
+        boolean[] sargColumns = new boolean[colCount];
+        for (int colIdx : rowIndexCols) {
+          sargColumns[colIdx] = true;
+        }
+        OrcIndex indices = rows
+            .readRowIndex(stripeIx, null, null, null, sargColumns);
+        for (int col : rowIndexCols) {
+          StringBuilder buf = new StringBuilder();
+          String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
+          buf.append(rowIdxString);
+          String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex());
+          buf.append(bloomFilString);
+          System.out.println(buf);
+        }
+      }
+    }
+
+    FileSystem fs = file.getFileSystem(conf);
+    long fileLen = fs.getFileStatus(file).getLen();
+    long paddedBytes = getTotalPaddingSize(reader);
+    // empty ORC file is ~45 bytes. Assumption here is file length always >0
+    double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+    DecimalFormat format = new DecimalFormat("##.##");
+    System.out.println("\nFile length: " + fileLen + " bytes");
+    System.out.println("Padding length: " + paddedBytes + " bytes");
+    System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
+    AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
+    if (acidStats != null) {
+      System.out.println("ACID stats:" + acidStats);
+    }
+    rows.close();
+  }
+
+  private static void recoverFiles(final List<String> corruptFiles, final Configuration conf,
+      final String backup)
+      throws IOException {
+    for (String corruptFile : corruptFiles) {
+      System.err.println("Recovering file " + corruptFile);
+      Path corruptPath = new Path(corruptFile);
+      FileSystem fs = corruptPath.getFileSystem(conf);
+      FSDataInputStream fdis = fs.open(corruptPath);
+      try {
+        long corruptFileLen = fs.getFileStatus(corruptPath).getLen();
+        long remaining = corruptFileLen;
+        List<Long> footerOffsets = Lists.newArrayList();
+
+        // start reading the data file form top to bottom and record the valid footers
+        while (remaining > 0) {
+          int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
+          byte[] data = new byte[toRead];
+          long startPos = corruptFileLen - remaining;
+          fdis.readFully(startPos, data, 0, toRead);
+
+          // find all MAGIC string and see if the file is readable from there
+          int index = 0;
+          long nextFooterOffset;
+
+          while (index != -1) {
+            index = indexOf(data, OrcFile.MAGIC.getBytes(), index + 1);
+            if (index != -1) {
+              nextFooterOffset = startPos + index + OrcFile.MAGIC.length() + 1;
+              if (isReadable(corruptPath, conf, nextFooterOffset)) {
+                footerOffsets.add(nextFooterOffset);
+              }
+            }
+          }
+
+          System.err.println("Scanning for valid footers - startPos: " + startPos +
+              " toRead: " + toRead + " remaining: " + remaining);
+          remaining = remaining - toRead;
+        }
+
+        System.err.println("Readable footerOffsets: " + footerOffsets);
+        recoverFile(corruptPath, fs, conf, footerOffsets, backup);
+      } catch (Exception e) {
+        Path recoveryFile = getRecoveryFile(corruptPath);
+        if (fs.exists(recoveryFile)) {
+          fs.delete(recoveryFile, false);
+        }
+        System.err.println("Unable to recover file " + corruptFile);
+        e.printStackTrace();
+        System.err.println(SEPARATOR);
+        continue;
+      } finally {
+        fdis.close();
+      }
+      System.err.println(corruptFile + " recovered successfully!");
+      System.err.println(SEPARATOR);
+    }
+  }
+
+  private static void recoverFile(final Path corruptPath, final FileSystem fs,
+      final Configuration conf, final List<Long> footerOffsets, final String backup)
+      throws IOException {
+
+    // first recover the file to .recovered file and then once successful rename it to actual file
+    Path recoveredPath = getRecoveryFile(corruptPath);
+
+    // make sure that file does not exist
+    if (fs.exists(recoveredPath)) {
+      fs.delete(recoveredPath, false);
+    }
+
+    // if there are no valid footers, the file should still be readable so create an empty orc file
+    if (footerOffsets == null || footerOffsets.isEmpty()) {
+      System.err.println("No readable footers found. Creating empty orc file.");
+      TypeDescription schema = TypeDescription.createStruct();
+      Writer writer = OrcFile.createWriter(recoveredPath,
+          OrcFile.writerOptions(conf).setSchema(schema));
+      writer.close();
+    } else {
+      FSDataInputStream fdis = fs.open(corruptPath);
+      FileStatus fileStatus = fs.getFileStatus(corruptPath);
+      // read corrupt file and copy it to recovered file until last valid footer
+      FSDataOutputStream fdos = fs.create(recoveredPath, true,
+          conf.getInt("io.file.buffer.size", 4096),
+          fileStatus.getReplication(),
+          fileStatus.getBlockSize());
+      try {
+        long fileLen = footerOffsets.get(footerOffsets.size() - 1);
+        long remaining = fileLen;
+
+        while (remaining > 0) {
+          int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
+          byte[] data = new byte[toRead];
+          long startPos = fileLen - remaining;
+          fdis.readFully(startPos, data, 0, toRead);
+          fdos.write(data);
+          System.err.println("Copying data to recovery file - startPos: " + startPos +
+              " toRead: " + toRead + " remaining: " + remaining);
+          remaining = remaining - toRead;
+        }
+      } catch (Exception e) {
+        fs.delete(recoveredPath, false);
+        throw new IOException(e);
+      } finally {
+        fdis.close();
+        fdos.close();
+      }
+    }
+
+    // validate the recovered file once again and start moving corrupt files to backup folder
+    if (isReadable(recoveredPath, conf, Long.MAX_VALUE)) {
+      Path backupDataPath;
+      String scheme = corruptPath.toUri().getScheme();
+      String authority = corruptPath.toUri().getAuthority();
+      String filePath = corruptPath.toUri().getPath();
+
+      // use the same filesystem as corrupt file if backup-path is not explicitly specified
+      if (backup.equals(DEFAULT_BACKUP_PATH)) {
+        backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath);
+      } else {
+        backupDataPath = Path.mergePaths(new Path(backup), corruptPath);
+      }
+
+      // Move data file to backup path
+      moveFiles(fs, corruptPath, backupDataPath);
+
+      // Move side file to backup path
+      Path sideFilePath = OrcAcidUtils.getSideFile(corruptPath);
+      Path backupSideFilePath = new Path(backupDataPath.getParent(), sideFilePath.getName());
+      moveFiles(fs, sideFilePath, backupSideFilePath);
+
+      // finally move recovered file to actual file
+      moveFiles(fs, recoveredPath, corruptPath);
+
+      // we are done recovering, backing up and validating
+      System.err.println("Validation of recovered file successful!");
+    }
+  }
+
+  private static void moveFiles(final FileSystem fs, final Path src, final Path dest)
+      throws IOException {
+    try {
+      // create the dest directory if not exist
+      if (!fs.exists(dest.getParent())) {
+        fs.mkdirs(dest.getParent());
+      }
+
+      // if the destination file exists for some reason delete it
+      fs.delete(dest, false);
+
+      if (fs.rename(src, dest)) {
+        System.err.println("Moved " + src + " to " + dest);
+      } else {
+        throw new IOException("Unable to move " + src + " to " + dest);
+      }
+
+    } catch (Exception e) {
+      throw new IOException("Unable to move " + src + " to " + dest, e);
+    }
+  }
+
+  private static Path getRecoveryFile(final Path corruptPath) {
+    return new Path(corruptPath.getParent(), corruptPath.getName() + ".recovered");
+  }
+
+  private static boolean isReadable(final Path corruptPath, final Configuration conf,
+      final long maxLen) {
+    try {
+      OrcFile.createReader(corruptPath, OrcFile.readerOptions(conf).maxLength(maxLen));
+      return true;
+    } catch (Exception e) {
+      // ignore this exception as maxLen is unreadable
+      return false;
+    }
+  }
+
+  // search for byte pattern in another byte array
+  private static int indexOf(final byte[] data, final byte[] pattern, final int index) {
+    if (data == null || data.length == 0 || pattern == null || pattern.length == 0 ||
+        index > data.length || index < 0) {
+      return -1;
+    }
+
+    int j = 0;
+    for (int i = index; i < data.length; i++) {
+      if (pattern[j] == data[i]) {
+        j++;
+      } else {
+        j = 0;
+      }
+
+      if (j == pattern.length) {
+        return i - pattern.length + 1;
+      }
+    }
+
+    return -1;
+  }
+
+  private static String getFormattedBloomFilters(int col,
+      OrcProto.BloomFilterIndex[] bloomFilterIndex) {
+    StringBuilder buf = new StringBuilder();
+    BloomFilterIO stripeLevelBF = null;
+    if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
+      int idx = 0;
+      buf.append("\n    Bloom filters for column ").append(col).append(":");
+      for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
+        BloomFilterIO toMerge = new BloomFilterIO(bf);
+        buf.append("\n      Entry ").append(idx++).append(":").append(getBloomFilterStats(toMerge));
+        if (stripeLevelBF == null) {
+          stripeLevelBF = toMerge;
+        } else {
+          stripeLevelBF.merge(toMerge);
+        }
+      }
+      String bloomFilterStats = getBloomFilterStats(stripeLevelBF);
+      buf.append("\n      Stripe level merge:").append(bloomFilterStats);
+    }
+    return buf.toString();
+  }
+
+  private static String getBloomFilterStats(BloomFilterIO bf) {
+    StringBuilder sb = new StringBuilder();
+    int bitCount = bf.getBitSize();
+    int popCount = 0;
+    for (long l : bf.getBitSet()) {
+      popCount += Long.bitCount(l);
+    }
+    int k = bf.getNumHashFunctions();
+    float loadFactor = (float) popCount / (float) bitCount;
+    float expectedFpp = (float) Math.pow(loadFactor, k);
+    DecimalFormat df = new DecimalFormat("###.####");
+    sb.append(" numHashFunctions: ").append(k);
+    sb.append(" bitCount: ").append(bitCount);
+    sb.append(" popCount: ").append(popCount);
+    sb.append(" loadFactor: ").append(df.format(loadFactor));
+    sb.append(" expectedFpp: ").append(expectedFpp);
+    return sb.toString();
+  }
+
+  private static String getFormattedRowIndices(int col,
+                                               OrcProto.RowIndex[] rowGroupIndex) {
+    StringBuilder buf = new StringBuilder();
+    OrcProto.RowIndex index;
+    buf.append("    Row group indices for column ").append(col).append(":");
+    if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
+        ((index = rowGroupIndex[col]) == null)) {
+      buf.append(" not found\n");
+      return buf.toString();
+    }
+
+    for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
+      buf.append("\n      Entry ").append(entryIx).append(": ");
+      OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
+      if (entry == null) {
+        buf.append("unknown\n");
+        continue;
+      }
+      OrcProto.ColumnStatistics colStats = entry.getStatistics();
+      if (colStats == null) {
+        buf.append("no stats at ");
+      } else {
+        ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
+        buf.append(cs.toString());
+      }
+      buf.append(" positions: ");
+      for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
+        if (posIx != 0) {
+          buf.append(",");
+        }
+        buf.append(entry.getPositions(posIx));
+      }
+    }
+    return buf.toString();
+  }
+
+  public static long getTotalPaddingSize(Reader reader) throws IOException {
+    long paddedBytes = 0;
+    List<StripeInformation> stripes = reader.getStripes();
+    for (int i = 1; i < stripes.size(); i++) {
+      long prevStripeOffset = stripes.get(i - 1).getOffset();
+      long prevStripeLen = stripes.get(i - 1).getLength();
+      paddedBytes += stripes.get(i).getOffset() - (prevStripeOffset + prevStripeLen);
+    }
+    return paddedBytes;
+  }
+
+  @SuppressWarnings("static-access")
+  static Options createOptions() {
+    Options result = new Options();
+
+    // add -d and --data to print the rows
+    result.addOption(OptionBuilder
+        .withLongOpt("data")
+        .withDescription("Should the data be printed")
+        .create('d'));
+
+    // to avoid breaking unit tests (when run in different time zones) for file dump, printing
+    // of timezone is made optional
+    result.addOption(OptionBuilder
+        .withLongOpt("timezone")
+        .withDescription("Print writer's time zone")
+        .create('t'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("help")
+        .withDescription("print help message")
+        .create('h'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("rowindex")
+        .withArgName("comma separated list of column ids for which row index should be printed")
+        .withDescription("Dump stats for column number(s)")
+        .hasArg()
+        .create('r'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("json")
+        .withDescription("Print metadata in JSON format")
+        .create('j'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("pretty")
+        .withDescription("Pretty print json metadata output")
+        .create('p'));
+
+    result.addOption(OptionBuilder
+        .withLongOpt("recover")
+        .withDescription("recover corrupted orc files generated by streaming")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("skip-dump")
+        .withDescription("used along with --recover to directly recover files without dumping")
+        .create());
+
+    result.addOption(OptionBuilder
+        .withLongOpt("backup-path")
+        .withDescription("specify a backup path to store the corrupted files (default: /tmp)")
+        .hasArg()
+        .create());
+    return result;
+  }
+
+  private static void printMap(JSONWriter writer,
+                               MapColumnVector vector,
+                               TypeDescription schema,
+                               int row) throws JSONException {
+    writer.array();
+    TypeDescription keyType = schema.getChildren().get(0);
+    TypeDescription valueType = schema.getChildren().get(1);
+    int offset = (int) vector.offsets[row];
+    for (int i = 0; i < vector.lengths[row]; ++i) {
+      writer.object();
+      writer.key("_key");
+      printValue(writer, vector.keys, keyType, offset + i);
+      writer.key("_value");
+      printValue(writer, vector.values, valueType, offset + i);
+      writer.endObject();
+    }
+    writer.endArray();
+  }
+
+  private static void printList(JSONWriter writer,
+                                ListColumnVector vector,
+                                TypeDescription schema,
+                                int row) throws JSONException {
+    writer.array();
+    int offset = (int) vector.offsets[row];
+    TypeDescription childType = schema.getChildren().get(0);
+    for (int i = 0; i < vector.lengths[row]; ++i) {
+      printValue(writer, vector.child, childType, offset + i);
+    }
+    writer.endArray();
+  }
+
+  private static void printUnion(JSONWriter writer,
+                                 UnionColumnVector vector,
+                                 TypeDescription schema,
+                                 int row) throws JSONException {
+    int tag = vector.tags[row];
+    printValue(writer, vector.fields[tag], schema.getChildren().get(tag), row);
+  }
+
+  static void printStruct(JSONWriter writer,
+                          StructColumnVector batch,
+                          TypeDescription schema,
+                          int row) throws JSONException {
+    writer.object();
+    List<String> fieldNames = schema.getFieldNames();
+    List<TypeDescription> fieldTypes = schema.getChildren();
+    for (int i = 0; i < fieldTypes.size(); ++i) {
+      writer.key(fieldNames.get(i));
+      printValue(writer, batch.fields[i], fieldTypes.get(i), row);
+    }
+    writer.endObject();
+  }
+
+  static void printBinary(JSONWriter writer, BytesColumnVector vector,
+                          int row) throws JSONException {
+    writer.array();
+    int offset = vector.start[row];
+    for(int i=0; i < vector.length[row]; ++i) {
+      writer.value(0xff & (int) vector.vector[row][offset + i]);
+    }
+    writer.endArray();
+  }
+  static void printValue(JSONWriter writer, ColumnVector vector,
+                         TypeDescription schema, int row) throws JSONException {
+    if (vector.isRepeating) {
+      row = 0;
+    }
+    if (vector.noNulls || !vector.isNull[row]) {
+      switch (schema.getCategory()) {
+        case BOOLEAN:
+          writer.value(((LongColumnVector) vector).vector[row] != 0);
+          break;
+        case BYTE:
+        case SHORT:
+        case INT:
+        case LONG:
+          writer.value(((LongColumnVector) vector).vector[row]);
+          break;
+        case FLOAT:
+        case DOUBLE:
+          writer.value(((DoubleColumnVector) vector).vector[row]);
+          break;
+        case STRING:
+        case CHAR:
+        case VARCHAR:
+          writer.value(((BytesColumnVector) vector).toString(row));
+          break;
+        case BINARY:
+          printBinary(writer, (BytesColumnVector) vector, row);
+          break;
+        case DECIMAL:
+          writer.value(((DecimalColumnVector) vector).vector[row].toString());
+          break;
+        case DATE:
+          writer.value(new DateWritable(
+              (int) ((LongColumnVector) vector).vector[row]).toString());
+          break;
+        case TIMESTAMP:
+          writer.value(((TimestampColumnVector) vector)
+              .asScratchTimestamp(row).toString());
+          break;
+        case LIST:
+          printList(writer, (ListColumnVector) vector, schema, row);
+          break;
+        case MAP:
+          printMap(writer, (MapColumnVector) vector, schema, row);
+          break;
+        case STRUCT:
+          printStruct(writer, (StructColumnVector) vector, schema, row);
+          break;
+        case UNION:
+          printUnion(writer, (UnionColumnVector) vector, schema, row);
+          break;
+        default:
+          throw new IllegalArgumentException("Unknown type " +
+              schema.toString());
+      }
+    } else {
+      writer.value(null);
+    }
+  }
+
+  static void printRow(JSONWriter writer,
+                       VectorizedRowBatch batch,
+                       TypeDescription schema,
+                       int row) throws JSONException {
+    if (schema.getCategory() == TypeDescription.Category.STRUCT) {
+      List<TypeDescription> fieldTypes = schema.getChildren();
+      List<String> fieldNames = schema.getFieldNames();
+      writer.object();
+      for (int c = 0; c < batch.cols.length; ++c) {
+        writer.key(fieldNames.get(c));
+        printValue(writer, batch.cols[c], fieldTypes.get(c), row);
+      }
+      writer.endObject();
+    } else {
+      printValue(writer, batch.cols[0], schema, row);
+    }
+  }
+
+  static void printJsonData(final Reader reader) throws IOException, JSONException {
+    PrintStream printStream = System.out;
+    OutputStreamWriter out = new OutputStreamWriter(printStream, "UTF-8");
+    RecordReader rows = reader.rows();
+    try {
+      TypeDescription schema = reader.getSchema();
+      VectorizedRowBatch batch = schema.createRowBatch();
+      while (rows.nextBatch(batch)) {
+        for(int r=0; r < batch.size; ++r) {
+          JSONWriter writer = new JSONWriter(out);
+          printRow(writer, batch, schema, r);
+          out.write("\n");
+          out.flush();
+          if (printStream.checkError()) {
+            throw new IOException("Error encountered when writing to stdout.");
+          }
+        }
+      }
+    } finally {
+      rows.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/tools/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/tools/JsonFileDump.java b/orc/src/java/org/apache/hive/orc/tools/JsonFileDump.java
new file mode 100644
index 0000000..5e60eed
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/tools/JsonFileDump.java
@@ -0,0 +1,411 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.tools;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.BinaryColumnStatistics;
+import org.apache.hive.orc.BloomFilterIO;
+import org.apache.hive.orc.BooleanColumnStatistics;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.DecimalColumnStatistics;
+import org.apache.hive.orc.DoubleColumnStatistics;
+import org.apache.hive.orc.IntegerColumnStatistics;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StripeInformation;
+import org.apache.hive.orc.StripeStatistics;
+import org.apache.hive.orc.TimestampColumnStatistics;
+import org.apache.hive.orc.impl.AcidStats;
+import org.apache.hive.orc.impl.ColumnStatisticsImpl;
+import org.apache.hive.orc.impl.OrcIndex;
+import org.apache.hive.orc.impl.RecordReaderImpl;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.Reader;
+import org.apache.hive.orc.impl.OrcAcidUtils;
+import org.codehaus.jettison.json.JSONArray;
+import org.apache.hive.orc.DateColumnStatistics;
+import org.apache.hive.orc.StringColumnStatistics;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
+import org.codehaus.jettison.json.JSONStringer;
+import org.codehaus.jettison.json.JSONWriter;
+
+/**
+ * File dump tool with json formatted output.
+ */
+public class JsonFileDump {
+
+  public static void printJsonMetaData(List<String> files,
+      Configuration conf,
+      List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone)
+      throws JSONException, IOException {
+    if (files.isEmpty()) {
+      return;
+    }
+    JSONStringer writer = new JSONStringer();
+    boolean multiFile = files.size() > 1;
+    if (multiFile) {
+      writer.array();
+    } else {
+      writer.object();
+    }
+    for (String filename : files) {
+      try {
+        if (multiFile) {
+          writer.object();
+        }
+        writer.key("fileName").value(filename);
+        Path path = new Path(filename);
+        Reader reader = FileDump.getReader(path, conf, null);
+        if (reader == null) {
+          writer.key("status").value("FAILED");
+          continue;
+        }
+        writer.key("fileVersion").value(reader.getFileVersion().getName());
+        writer.key("writerVersion").value(reader.getWriterVersion());
+        RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+        writer.key("numberOfRows").value(reader.getNumberOfRows());
+        writer.key("compression").value(reader.getCompressionKind());
+        if (reader.getCompressionKind() != CompressionKind.NONE) {
+          writer.key("compressionBufferSize").value(reader.getCompressionSize());
+        }
+        writer.key("schemaString").value(reader.getSchema().toString());
+        writer.key("schema").array();
+        writeSchema(writer, reader.getTypes());
+        writer.endArray();
+
+        writer.key("stripeStatistics").array();
+        List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
+        for (int n = 0; n < stripeStatistics.size(); n++) {
+          writer.object();
+          writer.key("stripeNumber").value(n + 1);
+          StripeStatistics ss = stripeStatistics.get(n);
+          writer.key("columnStatistics").array();
+          for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+            writer.object();
+            writer.key("columnId").value(i);
+            writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+            writer.endObject();
+          }
+          writer.endArray();
+          writer.endObject();
+        }
+        writer.endArray();
+
+        ColumnStatistics[] stats = reader.getStatistics();
+        int colCount = stats.length;
+        if (rowIndexCols == null) {
+          rowIndexCols = new ArrayList<>(colCount);
+          for (int i = 0; i < colCount; ++i) {
+            rowIndexCols.add(i);
+          }
+        }
+        writer.key("fileStatistics").array();
+        for (int i = 0; i < stats.length; ++i) {
+          writer.object();
+          writer.key("columnId").value(i);
+          writeColumnStatistics(writer, stats[i]);
+          writer.endObject();
+        }
+        writer.endArray();
+
+        writer.key("stripes").array();
+        int stripeIx = -1;
+        for (StripeInformation stripe : reader.getStripes()) {
+          ++stripeIx;
+          long stripeStart = stripe.getOffset();
+          OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+          writer.object(); // start of stripe information
+          writer.key("stripeNumber").value(stripeIx + 1);
+          writer.key("stripeInformation");
+          writeStripeInformation(writer, stripe);
+          if (printTimeZone) {
+            writer.key("writerTimezone").value(
+                footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
+          }
+          long sectionStart = stripeStart;
+
+          writer.key("streams").array();
+          for (OrcProto.Stream section : footer.getStreamsList()) {
+            writer.object();
+            String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
+            writer.key("columnId").value(section.getColumn());
+            writer.key("section").value(kind);
+            writer.key("startOffset").value(sectionStart);
+            writer.key("length").value(section.getLength());
+            sectionStart += section.getLength();
+            writer.endObject();
+          }
+          writer.endArray();
+
+          writer.key("encodings").array();
+          for (int i = 0; i < footer.getColumnsCount(); ++i) {
+            writer.object();
+            OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+            writer.key("columnId").value(i);
+            writer.key("kind").value(encoding.getKind());
+            if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+                encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+              writer.key("dictionarySize").value(encoding.getDictionarySize());
+            }
+            writer.endObject();
+          }
+          writer.endArray();
+          if (!rowIndexCols.isEmpty()) {
+            // include the columns that are specified, only if the columns are included, bloom filter
+            // will be read
+            boolean[] sargColumns = new boolean[colCount];
+            for (int colIdx : rowIndexCols) {
+              sargColumns[colIdx] = true;
+            }
+            OrcIndex indices = rows.readRowIndex(stripeIx, null, sargColumns);
+            writer.key("indexes").array();
+            for (int col : rowIndexCols) {
+              writer.object();
+              writer.key("columnId").value(col);
+              writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+              writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+              writer.endObject();
+            }
+            writer.endArray();
+          }
+          writer.endObject(); // end of stripe information
+        }
+        writer.endArray();
+
+        FileSystem fs = path.getFileSystem(conf);
+        long fileLen = fs.getContentSummary(path).getLength();
+        long paddedBytes = FileDump.getTotalPaddingSize(reader);
+        // empty ORC file is ~45 bytes. Assumption here is file length always >0
+        double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+        writer.key("fileLength").value(fileLen);
+        writer.key("paddingLength").value(paddedBytes);
+        writer.key("paddingRatio").value(percentPadding);
+        AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
+        if (acidStats != null) {
+          writer.key("numInserts").value(acidStats.inserts);
+          writer.key("numDeletes").value(acidStats.deletes);
+          writer.key("numUpdates").value(acidStats.updates);
+        }
+        writer.key("status").value("OK");
+        rows.close();
+
+        writer.endObject();
+      } catch (Exception e) {
+        writer.key("status").value("FAILED");
+        throw e;
+      }
+    }
+    if (multiFile) {
+      writer.endArray();
+    }
+
+    if (prettyPrint) {
+      final String prettyJson;
+      if (multiFile) {
+        JSONArray jsonArray = new JSONArray(writer.toString());
+        prettyJson = jsonArray.toString(2);
+      } else {
+        JSONObject jsonObject = new JSONObject(writer.toString());
+        prettyJson = jsonObject.toString(2);
+      }
+      System.out.println(prettyJson);
+    } else {
+      System.out.println(writer.toString());
+    }
+  }
+
+  private static void writeSchema(JSONStringer writer, List<OrcProto.Type> types)
+      throws JSONException {
+    int i = 0;
+    for(OrcProto.Type type : types) {
+      writer.object();
+      writer.key("columnId").value(i++);
+      writer.key("columnType").value(type.getKind());
+      if (type.getFieldNamesCount() > 0) {
+        writer.key("childColumnNames").array();
+        for (String field : type.getFieldNamesList()) {
+          writer.value(field);
+        }
+        writer.endArray();
+        writer.key("childColumnIds").array();
+        for (Integer colId : type.getSubtypesList()) {
+          writer.value(colId);
+        }
+        writer.endArray();
+      }
+      if (type.hasPrecision()) {
+        writer.key("precision").value(type.getPrecision());
+      }
+
+      if (type.hasScale()) {
+        writer.key("scale").value(type.getScale());
+      }
+
+      if (type.hasMaximumLength()) {
+        writer.key("maxLength").value(type.getMaximumLength());
+      }
+      writer.endObject();
+    }
+  }
+
+  private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe)
+      throws JSONException {
+    writer.object();
+    writer.key("offset").value(stripe.getOffset());
+    writer.key("indexLength").value(stripe.getIndexLength());
+    writer.key("dataLength").value(stripe.getDataLength());
+    writer.key("footerLength").value(stripe.getFooterLength());
+    writer.key("rowCount").value(stripe.getNumberOfRows());
+    writer.endObject();
+  }
+
+  private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs)
+      throws JSONException {
+    if (cs != null) {
+      writer.key("count").value(cs.getNumberOfValues());
+      writer.key("hasNull").value(cs.hasNull());
+      if (cs instanceof BinaryColumnStatistics) {
+        writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum());
+        writer.key("type").value(OrcProto.Type.Kind.BINARY);
+      } else if (cs instanceof BooleanColumnStatistics) {
+        writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount());
+        writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount());
+        writer.key("type").value(OrcProto.Type.Kind.BOOLEAN);
+      } else if (cs instanceof IntegerColumnStatistics) {
+        writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum());
+        writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum());
+        if (((IntegerColumnStatistics) cs).isSumDefined()) {
+          writer.key("sum").value(((IntegerColumnStatistics) cs).getSum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.LONG);
+      } else if (cs instanceof DoubleColumnStatistics) {
+        writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum());
+        writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum());
+        writer.key("sum").value(((DoubleColumnStatistics) cs).getSum());
+        writer.key("type").value(OrcProto.Type.Kind.DOUBLE);
+      } else if (cs instanceof StringColumnStatistics) {
+        writer.key("min").value(((StringColumnStatistics) cs).getMinimum());
+        writer.key("max").value(((StringColumnStatistics) cs).getMaximum());
+        writer.key("totalLength").value(((StringColumnStatistics) cs).getSum());
+        writer.key("type").value(OrcProto.Type.Kind.STRING);
+      } else if (cs instanceof DateColumnStatistics) {
+        if (((DateColumnStatistics) cs).getMaximum() != null) {
+          writer.key("min").value(((DateColumnStatistics) cs).getMinimum());
+          writer.key("max").value(((DateColumnStatistics) cs).getMaximum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.DATE);
+      } else if (cs instanceof TimestampColumnStatistics) {
+        if (((TimestampColumnStatistics) cs).getMaximum() != null) {
+          writer.key("min").value(((TimestampColumnStatistics) cs).getMinimum());
+          writer.key("max").value(((TimestampColumnStatistics) cs).getMaximum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.TIMESTAMP);
+      } else if (cs instanceof DecimalColumnStatistics) {
+        if (((DecimalColumnStatistics) cs).getMaximum() != null) {
+          writer.key("min").value(((DecimalColumnStatistics) cs).getMinimum());
+          writer.key("max").value(((DecimalColumnStatistics) cs).getMaximum());
+          writer.key("sum").value(((DecimalColumnStatistics) cs).getSum());
+        }
+        writer.key("type").value(OrcProto.Type.Kind.DECIMAL);
+      }
+    }
+  }
+
+  private static void writeBloomFilterIndexes(JSONWriter writer, int col,
+      OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException {
+
+    BloomFilterIO stripeLevelBF = null;
+    if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
+      int entryIx = 0;
+      writer.key("bloomFilterIndexes").array();
+      for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
+        writer.object();
+        writer.key("entryId").value(entryIx++);
+        BloomFilterIO toMerge = new BloomFilterIO(bf);
+        writeBloomFilterStats(writer, toMerge);
+        if (stripeLevelBF == null) {
+          stripeLevelBF = toMerge;
+        } else {
+          stripeLevelBF.merge(toMerge);
+        }
+        writer.endObject();
+      }
+      writer.endArray();
+    }
+    if (stripeLevelBF != null) {
+      writer.key("stripeLevelBloomFilter");
+      writer.object();
+      writeBloomFilterStats(writer, stripeLevelBF);
+      writer.endObject();
+    }
+  }
+
+  private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf)
+      throws JSONException {
+    int bitCount = bf.getBitSize();
+    int popCount = 0;
+    for (long l : bf.getBitSet()) {
+      popCount += Long.bitCount(l);
+    }
+    int k = bf.getNumHashFunctions();
+    float loadFactor = (float) popCount / (float) bitCount;
+    float expectedFpp = (float) Math.pow(loadFactor, k);
+    writer.key("numHashFunctions").value(k);
+    writer.key("bitCount").value(bitCount);
+    writer.key("popCount").value(popCount);
+    writer.key("loadFactor").value(loadFactor);
+    writer.key("expectedFpp").value(expectedFpp);
+  }
+
+  private static void writeRowGroupIndexes(JSONWriter writer, int col,
+      OrcProto.RowIndex[] rowGroupIndex)
+      throws JSONException {
+
+    OrcProto.RowIndex index;
+    if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
+        ((index = rowGroupIndex[col]) == null)) {
+      return;
+    }
+
+    writer.key("rowGroupIndexes").array();
+    for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
+      writer.object();
+      writer.key("entryId").value(entryIx);
+      OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
+      if (entry == null) {
+        continue;
+      }
+      OrcProto.ColumnStatistics colStats = entry.getStatistics();
+      writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
+      writer.key("positions").array();
+      for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
+        writer.value(entry.getPositions(posIx));
+      }
+      writer.endArray();
+      writer.endObject();
+    }
+    writer.endArray();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/BinaryColumnStatistics.java b/orc/src/java/org/apache/orc/BinaryColumnStatistics.java
deleted file mode 100644
index 19db98a..0000000
--- a/orc/src/java/org/apache/orc/BinaryColumnStatistics.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for binary columns.
- */
-public interface BinaryColumnStatistics extends ColumnStatistics {
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/BloomFilterIO.java b/orc/src/java/org/apache/orc/BloomFilterIO.java
deleted file mode 100644
index 1406266..0000000
--- a/orc/src/java/org/apache/orc/BloomFilterIO.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import org.apache.hive.common.util.BloomFilter;
-
-import com.google.common.primitives.Longs;
-
-public class BloomFilterIO extends BloomFilter {
-
-  public BloomFilterIO(long expectedEntries) {
-    super(expectedEntries, DEFAULT_FPP);
-  }
-
-  public BloomFilterIO(long expectedEntries, double fpp) {
-    super(expectedEntries, fpp);
-  }
-
-/**
- * Initializes the BloomFilter from the given Orc BloomFilter
- */
-  public BloomFilterIO(OrcProto.BloomFilter bloomFilter) {
-    this.bitSet = new BitSet(Longs.toArray(bloomFilter.getBitsetList()));
-    this.numHashFunctions = bloomFilter.getNumHashFunctions();
-    this.numBits = (int) this.bitSet.bitSize();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/BooleanColumnStatistics.java b/orc/src/java/org/apache/orc/BooleanColumnStatistics.java
deleted file mode 100644
index af08f06..0000000
--- a/orc/src/java/org/apache/orc/BooleanColumnStatistics.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for boolean columns.
- */
-public interface BooleanColumnStatistics extends ColumnStatistics {
-  long getFalseCount();
-
-  long getTrueCount();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/ColumnStatistics.java b/orc/src/java/org/apache/orc/ColumnStatistics.java
deleted file mode 100644
index 72d8fbf..0000000
--- a/orc/src/java/org/apache/orc/ColumnStatistics.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-/**
- * Statistics that are available for all types of columns.
- */
-public interface ColumnStatistics {
-  /**
-   * Get the number of values in this column. It will differ from the number
-   * of rows because of NULL values and repeated values.
-   * @return the number of values
-   */
-  long getNumberOfValues();
-
-  /**
-   * Returns true if there are nulls in the scope of column statistics.
-   * @return true if null present else false
-   */
-  boolean hasNull();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/CompressionCodec.java b/orc/src/java/org/apache/orc/CompressionCodec.java
deleted file mode 100644
index 3421969..0000000
--- a/orc/src/java/org/apache/orc/CompressionCodec.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-import javax.annotation.Nullable;
-
-public interface CompressionCodec {
-
-  enum Modifier {
-    /* speed/compression tradeoffs */
-    FASTEST,
-    FAST,
-    DEFAULT,
-    /* data sensitivity modifiers */
-    TEXT,
-    BINARY
-  };
-
-  /**
-   * Compress the in buffer to the out buffer.
-   * @param in the bytes to compress
-   * @param out the uncompressed bytes
-   * @param overflow put any additional bytes here
-   * @return true if the output is smaller than input
-   * @throws IOException
-   */
-  boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
-                  ) throws IOException;
-
-  /**
-   * Decompress the in buffer to the out buffer.
-   * @param in the bytes to decompress
-   * @param out the decompressed bytes
-   * @throws IOException
-   */
-  void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
-
-  /**
-   * Produce a modified compression codec if the underlying algorithm allows
-   * modification.
-   *
-   * This does not modify the current object, but returns a new object if
-   * modifications are possible. Returns the same object if no modifications
-   * are possible.
-   * @param modifiers compression modifiers
-   * @return codec for use after optional modification
-   */
-  CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/CompressionKind.java b/orc/src/java/org/apache/orc/CompressionKind.java
deleted file mode 100644
index f684bef..0000000
--- a/orc/src/java/org/apache/orc/CompressionKind.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-/**
- * An enumeration that lists the generic compression algorithms that
- * can be applied to ORC files.
- */
-public enum CompressionKind {
-  NONE, ZLIB, SNAPPY, LZO
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/DataReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DataReader.java b/orc/src/java/org/apache/orc/DataReader.java
deleted file mode 100644
index a5dbb76..0000000
--- a/orc/src/java/org/apache/orc/DataReader.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.orc.impl.OrcIndex;
-
-/** An abstract data reader that IO formats can use to read bytes from underlying storage. */
-public interface DataReader extends AutoCloseable {
-
-  /** Opens the DataReader, making it ready to use. */
-  void open() throws IOException;
-
-  OrcIndex readRowIndex(StripeInformation stripe,
-                        OrcProto.StripeFooter footer,
-                        boolean[] included, OrcProto.RowIndex[] indexes,
-                        boolean[] sargColumns,
-                        OrcProto.BloomFilterIndex[] bloomFilterIndices
-                        ) throws IOException;
-
-  OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException;
-
-  /** Reads the data.
-   *
-   * Note that for the cases such as zero-copy read, caller must release the disk ranges
-   * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed.
-   * @param range List if disk ranges to read. Ranges with data will be ignored.
-   * @param baseOffset Base offset from the start of the file of the ranges in disk range list.
-   * @param doForceDirect Whether the data should be read into direct buffers.
-   * @return New or modified list of DiskRange-s, where all the ranges are filled with data.
-   */
-  DiskRangeList readFileData(
-      DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException;
-
-
-  /**
-   * Whether the user should release buffers created by readFileData. See readFileData javadoc.
-   */
-  boolean isTrackingDiskRanges();
-
-  /**
-   * Releases buffers created by readFileData. See readFileData javadoc.
-   * @param toRelease The buffer to release.
-   */
-  void releaseBuffer(ByteBuffer toRelease);
-
-  /**
-   * Clone the entire state of the DataReader with the assumption that the
-   * clone will be closed at a different time. Thus, any file handles in the
-   * implementation need to be cloned.
-   * @return a new instance
-   */
-  DataReader clone();
-
-  @Override
-  public void close() throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DateColumnStatistics.java b/orc/src/java/org/apache/orc/DateColumnStatistics.java
deleted file mode 100644
index cdd01af..0000000
--- a/orc/src/java/org/apache/orc/DateColumnStatistics.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-import java.util.Date;
-
-/**
- * Statistics for DATE columns.
- */
-public interface DateColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Date getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Date getMaximum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DecimalColumnStatistics.java b/orc/src/java/org/apache/orc/DecimalColumnStatistics.java
deleted file mode 100644
index 51b6d7d..0000000
--- a/orc/src/java/org/apache/orc/DecimalColumnStatistics.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for decimal columns.
- */
-public interface DecimalColumnStatistics extends ColumnStatistics {
-
-  /**
-   * Get the minimum value for the column.
-   * @return the minimum value
-   */
-  HiveDecimal getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return the maximum value
-   */
-  HiveDecimal getMaximum();
-
-  /**
-   * Get the sum of the values of the column.
-   * @return the sum
-   */
-  HiveDecimal getSum();
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/DoubleColumnStatistics.java b/orc/src/java/org/apache/orc/DoubleColumnStatistics.java
deleted file mode 100644
index 00c728f..0000000
--- a/orc/src/java/org/apache/orc/DoubleColumnStatistics.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import org.apache.orc.ColumnStatistics;
-
-/**
- * Statistics for float and double columns.
- */
-public interface DoubleColumnStatistics extends ColumnStatistics {
-
-  /**
-   * Get the smallest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the minimum
-   */
-  double getMinimum();
-
-  /**
-   * Get the largest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the maximum
-   */
-  double getMaximum();
-
-  /**
-   * Get the sum of the values in the column.
-   * @return the sum
-   */
-  double getSum();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/FileFormatException.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/FileFormatException.java b/orc/src/java/org/apache/orc/FileFormatException.java
deleted file mode 100644
index 2cebea7..0000000
--- a/orc/src/java/org/apache/orc/FileFormatException.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import java.io.IOException;
-
-/**
- * Thrown when an invalid file format is encountered.
- */
-public class FileFormatException extends IOException {
-
-  public FileFormatException(String errMsg) {
-    super(errMsg);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/FileMetadata.java b/orc/src/java/org/apache/orc/FileMetadata.java
deleted file mode 100644
index 807e696..0000000
--- a/orc/src/java/org/apache/orc/FileMetadata.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import java.util.List;
-
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-
-/**
- * Cached file metadata. Right now, it caches everything; we don't have to store all the
- * protobuf structs actually, we could just store what we need, but that would require that
- * ORC stop depending on them too. Luckily, they shouldn't be very big.
- */
-public interface FileMetadata {
-  boolean isOriginalFormat();
-
-  List<StripeInformation> getStripes();
-
-  CompressionKind getCompressionKind();
-
-  int getCompressionBufferSize();
-
-  int getRowIndexStride();
-
-  int getColumnCount();
-
-  int getFlattenedColumnCount();
-
-  Object getFileKey();
-
-  List<Integer> getVersionList();
-
-  int getMetadataSize();
-
-  int getWriterVersionNum();
-
-  List<OrcProto.Type> getTypes();
-
-  List<OrcProto.StripeStatistics> getStripeStats();
-
-  long getContentLength();
-
-  long getNumberOfRows();
-
-  List<OrcProto.ColumnStatistics> getFileStats();
-}
\ No newline at end of file


[34/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java b/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java
new file mode 100644
index 0000000..92ddfbe
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for binary columns.
+ */
+public interface BinaryColumnStatistics extends ColumnStatistics {
+  long getSum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/BloomFilterIO.java b/orc/src/java/org/apache/hive/orc/BloomFilterIO.java
new file mode 100644
index 0000000..a6e6408
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/BloomFilterIO.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hive.common.util.BloomFilter;
+
+import com.google.common.primitives.Longs;
+
+public class BloomFilterIO extends BloomFilter {
+
+  public BloomFilterIO(long expectedEntries) {
+    super(expectedEntries, DEFAULT_FPP);
+  }
+
+  public BloomFilterIO(long expectedEntries, double fpp) {
+    super(expectedEntries, fpp);
+  }
+
+/**
+ * Initializes the BloomFilter from the given Orc BloomFilter
+ */
+  public BloomFilterIO(OrcProto.BloomFilter bloomFilter) {
+    this.bitSet = new BitSet(Longs.toArray(bloomFilter.getBitsetList()));
+    this.numHashFunctions = bloomFilter.getNumHashFunctions();
+    this.numBits = (int) this.bitSet.bitSize();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java b/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java
new file mode 100644
index 0000000..14fc6cf
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for boolean columns.
+ */
+public interface BooleanColumnStatistics extends ColumnStatistics {
+  long getFalseCount();
+
+  long getTrueCount();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/ColumnStatistics.java b/orc/src/java/org/apache/hive/orc/ColumnStatistics.java
new file mode 100644
index 0000000..5ab8f55
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/ColumnStatistics.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics that are available for all types of columns.
+ */
+public interface ColumnStatistics {
+  /**
+   * Get the number of values in this column. It will differ from the number
+   * of rows because of NULL values and repeated values.
+   * @return the number of values
+   */
+  long getNumberOfValues();
+
+  /**
+   * Returns true if there are nulls in the scope of column statistics.
+   * @return true if null present else false
+   */
+  boolean hasNull();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/CompressionCodec.java b/orc/src/java/org/apache/hive/orc/CompressionCodec.java
new file mode 100644
index 0000000..eee8dc3
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/CompressionCodec.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+import javax.annotation.Nullable;
+
+public interface CompressionCodec {
+
+  enum Modifier {
+    /* speed/compression tradeoffs */
+    FASTEST,
+    FAST,
+    DEFAULT,
+    /* data sensitivity modifiers */
+    TEXT,
+    BINARY
+  };
+
+  /**
+   * Compress the in buffer to the out buffer.
+   * @param in the bytes to compress
+   * @param out the uncompressed bytes
+   * @param overflow put any additional bytes here
+   * @return true if the output is smaller than input
+   * @throws IOException
+   */
+  boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
+                  ) throws IOException;
+
+  /**
+   * Decompress the in buffer to the out buffer.
+   * @param in the bytes to decompress
+   * @param out the decompressed bytes
+   * @throws IOException
+   */
+  void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
+
+  /**
+   * Produce a modified compression codec if the underlying algorithm allows
+   * modification.
+   *
+   * This does not modify the current object, but returns a new object if
+   * modifications are possible. Returns the same object if no modifications
+   * are possible.
+   * @param modifiers compression modifiers
+   * @return codec for use after optional modification
+   */
+  CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/CompressionKind.java b/orc/src/java/org/apache/hive/orc/CompressionKind.java
new file mode 100644
index 0000000..0d78642
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/CompressionKind.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+/**
+ * An enumeration that lists the generic compression algorithms that
+ * can be applied to ORC files.
+ */
+public enum CompressionKind {
+  NONE, ZLIB, SNAPPY, LZO
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DataReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DataReader.java b/orc/src/java/org/apache/hive/orc/DataReader.java
new file mode 100644
index 0000000..091a5b9
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DataReader.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hive.orc.impl.OrcIndex;
+
+/** An abstract data reader that IO formats can use to read bytes from underlying storage. */
+public interface DataReader extends AutoCloseable {
+
+  /** Opens the DataReader, making it ready to use. */
+  void open() throws IOException;
+
+  OrcIndex readRowIndex(StripeInformation stripe,
+                        OrcProto.StripeFooter footer,
+                        boolean[] included, OrcProto.RowIndex[] indexes,
+                        boolean[] sargColumns,
+                        OrcProto.BloomFilterIndex[] bloomFilterIndices
+                        ) throws IOException;
+
+  OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException;
+
+  /** Reads the data.
+   *
+   * Note that for the cases such as zero-copy read, caller must release the disk ranges
+   * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed.
+   * @param range List if disk ranges to read. Ranges with data will be ignored.
+   * @param baseOffset Base offset from the start of the file of the ranges in disk range list.
+   * @param doForceDirect Whether the data should be read into direct buffers.
+   * @return New or modified list of DiskRange-s, where all the ranges are filled with data.
+   */
+  DiskRangeList readFileData(
+      DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException;
+
+
+  /**
+   * Whether the user should release buffers created by readFileData. See readFileData javadoc.
+   */
+  boolean isTrackingDiskRanges();
+
+  /**
+   * Releases buffers created by readFileData. See readFileData javadoc.
+   * @param toRelease The buffer to release.
+   */
+  void releaseBuffer(ByteBuffer toRelease);
+
+  /**
+   * Clone the entire state of the DataReader with the assumption that the
+   * clone will be closed at a different time. Thus, any file handles in the
+   * implementation need to be cloned.
+   * @return a new instance
+   */
+  DataReader clone();
+
+  @Override
+  public void close() throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java b/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java
new file mode 100644
index 0000000..b03dcec
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.util.Date;
+
+/**
+ * Statistics for DATE columns.
+ */
+public interface DateColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the minimum value for the column.
+   * @return minimum value
+   */
+  Date getMinimum();
+
+  /**
+   * Get the maximum value for the column.
+   * @return maximum value
+   */
+  Date getMaximum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java b/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java
new file mode 100644
index 0000000..4dbbc12
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+/**
+ * Statistics for decimal columns.
+ */
+public interface DecimalColumnStatistics extends ColumnStatistics {
+
+  /**
+   * Get the minimum value for the column.
+   * @return the minimum value
+   */
+  HiveDecimal getMinimum();
+
+  /**
+   * Get the maximum value for the column.
+   * @return the maximum value
+   */
+  HiveDecimal getMaximum();
+
+  /**
+   * Get the sum of the values of the column.
+   * @return the sum
+   */
+  HiveDecimal getSum();
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java b/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java
new file mode 100644
index 0000000..5f2d426
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for float and double columns.
+ */
+public interface DoubleColumnStatistics extends ColumnStatistics {
+
+  /**
+   * Get the smallest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the minimum
+   */
+  double getMinimum();
+
+  /**
+   * Get the largest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the maximum
+   */
+  double getMaximum();
+
+  /**
+   * Get the sum of the values in the column.
+   * @return the sum
+   */
+  double getSum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/FileFormatException.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/FileFormatException.java b/orc/src/java/org/apache/hive/orc/FileFormatException.java
new file mode 100644
index 0000000..30356c3
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/FileFormatException.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.io.IOException;
+
+/**
+ * Thrown when an invalid file format is encountered.
+ */
+public class FileFormatException extends IOException {
+
+  public FileFormatException(String errMsg) {
+    super(errMsg);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/FileMetadata.java b/orc/src/java/org/apache/hive/orc/FileMetadata.java
new file mode 100644
index 0000000..acb8a78
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/FileMetadata.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.util.List;
+
+/**
+ * Cached file metadata. Right now, it caches everything; we don't have to store all the
+ * protobuf structs actually, we could just store what we need, but that would require that
+ * ORC stop depending on them too. Luckily, they shouldn't be very big.
+ */
+public interface FileMetadata {
+  boolean isOriginalFormat();
+
+  List<StripeInformation> getStripes();
+
+  CompressionKind getCompressionKind();
+
+  int getCompressionBufferSize();
+
+  int getRowIndexStride();
+
+  int getColumnCount();
+
+  int getFlattenedColumnCount();
+
+  Object getFileKey();
+
+  List<Integer> getVersionList();
+
+  int getMetadataSize();
+
+  int getWriterVersionNum();
+
+  List<OrcProto.Type> getTypes();
+
+  List<OrcProto.StripeStatistics> getStripeStats();
+
+  long getContentLength();
+
+  long getNumberOfRows();
+
+  List<OrcProto.ColumnStatistics> getFileStats();
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java b/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java
new file mode 100644
index 0000000..00d17eb
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for all of the integer columns, such as byte, short, int, and
+ * long.
+ */
+public interface IntegerColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the smallest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the minimum
+   */
+  long getMinimum();
+
+  /**
+   * Get the largest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the maximum
+   */
+  long getMaximum();
+
+  /**
+   * Is the sum defined? If the sum overflowed the counter this will be false.
+   * @return is the sum available
+   */
+  boolean isSumDefined();
+
+  /**
+   * Get the sum of the column. Only valid if isSumDefined returns true.
+   * @return the sum of the column
+   */
+  long getSum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcConf.java b/orc/src/java/org/apache/hive/orc/OrcConf.java
new file mode 100644
index 0000000..dc2f865
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/OrcConf.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hadoop.conf.Configuration;
+
+import java.util.Properties;
+
+/**
+ * Define the configuration properties that Orc understands.
+ */
+public enum OrcConf {
+  STRIPE_SIZE("orc.stripe.size", "hive.exec.orc.default.stripe.size",
+      64L * 1024 * 1024,
+      "Define the default ORC stripe size, in bytes."),
+  BLOCK_SIZE("orc.block.size", "hive.exec.orc.default.block.size",
+      256L * 1024 * 1024,
+      "Define the default file system block size for ORC files."),
+  ENABLE_INDEXES("orc.create.index", "orc.create.index", true,
+      "Should the ORC writer create indexes as part of the file."),
+  ROW_INDEX_STRIDE("orc.row.index.stride",
+      "hive.exec.orc.default.row.index.stride", 10000,
+      "Define the default ORC index stride in number of rows. (Stride is the\n"+
+          " number of rows n index entry represents.)"),
+  BUFFER_SIZE("orc.compress.size", "hive.exec.orc.default.buffer.size",
+      256 * 1024, "Define the default ORC buffer size, in bytes."),
+  BASE_DELTA_RATIO("orc.base.delta.ratio", "hive.exec.orc.base.delta.ratio", 8,
+      "The ratio of base writer and delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
+  BLOCK_PADDING("orc.block.padding", "hive.exec.orc.default.block.padding",
+      true,
+      "Define whether stripes should be padded to the HDFS block boundaries."),
+  COMPRESS("orc.compress", "hive.exec.orc.default.compress", "ZLIB",
+      "Define the default compression codec for ORC file"),
+  WRITE_FORMAT("orc.write.format", "hive.exec.orc.write.format", "0.12",
+      "Define the version of the file to write. Possible values are 0.11 and\n"+
+          " 0.12. If this parameter is not defined, ORC will use the run\n" +
+          " length encoding (RLE) introduced in Hive 0.12."),
+  ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy",
+      "SPEED",
+      "Define the encoding strategy to use while writing data. Changing this\n"+
+          "will only affect the light weight encoding for integers. This\n" +
+          "flag will not change the compression level of higher level\n" +
+          "compression codec (like ZLIB)."),
+  COMPRESSION_STRATEGY("orc.compression.strategy",
+      "hive.exec.orc.compression.strategy", "SPEED",
+      "Define the compression strategy to use while writing data.\n" +
+          "This changes the compression level of higher level compression\n" +
+          "codec (like ZLIB)."),
+  BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
+      "hive.exec.orc.block.padding.tolerance", 0.05,
+      "Define the tolerance for block padding as a decimal fraction of\n" +
+          "stripe size (for example, the default value 0.05 is 5% of the\n" +
+          "stripe size). For the defaults of 64Mb ORC stripe and 256Mb HDFS\n" +
+          "blocks, the default block padding tolerance of 5% will\n" +
+          "reserve a maximum of 3.2Mb for padding within the 256Mb block.\n" +
+          "In that case, if the available size within the block is more than\n"+
+          "3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
+          "space. This will make sure that no stripe written will block\n" +
+          " boundaries and cause remote reads within a node local task."),
+  BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
+      "Define the default false positive probability for bloom filters."),
+  USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
+      "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
+  SKIP_CORRUPT_DATA("orc.skip.corrupt.data", "hive.exec.orc.skip.corrupt.data",
+      false,
+      "If ORC reader encounters corrupt data, this value will be used to\n" +
+          "determine whether to skip the corrupt data or throw exception.\n" +
+          "The default behavior is to throw exception."),
+  MEMORY_POOL("orc.memory.pool", "hive.exec.orc.memory.pool", 0.5,
+      "Maximum fraction of heap that can be used by ORC file writers"),
+  DICTIONARY_KEY_SIZE_THRESHOLD("orc.dictionary.key.threshold",
+      "hive.exec.orc.dictionary.key.size.threshold",
+      0.8,
+      "If the number of distinct keys in a dictionary is greater than this\n" +
+          "fraction of the total number of non-null rows, turn off \n" +
+          "dictionary encoding.  Use 1 to always use dictionary encoding."),
+  ROW_INDEX_STRIDE_DICTIONARY_CHECK("orc.dictionary.early.check",
+      "hive.orc.row.index.stride.dictionary.check",
+      true,
+      "If enabled dictionary check will happen after first row index stride\n" +
+          "(default 10000 rows) else dictionary check will happen before\n" +
+          "writing first stripe. In both cases, the decision to use\n" +
+          "dictionary or not will be retained thereafter."),
+  BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns", "orc.bloom.filter.columns",
+      "", "List of columns to create bloom filters for when writing.")
+  ;
+
+  private final String attribute;
+  private final String hiveConfName;
+  private final Object defaultValue;
+  private final String description;
+
+  OrcConf(String attribute,
+          String hiveConfName,
+          Object defaultValue,
+          String description) {
+    this.attribute = attribute;
+    this.hiveConfName = hiveConfName;
+    this.defaultValue = defaultValue;
+    this.description = description;
+  }
+
+  public String getAttribute() {
+    return attribute;
+  }
+
+  public String getHiveConfName() {
+    return hiveConfName;
+  }
+
+  public Object getDefaultValue() {
+    return defaultValue;
+  }
+
+  public String getDescription() {
+    return description;
+  }
+
+  private String lookupValue(Properties tbl, Configuration conf) {
+    String result = null;
+    if (tbl != null) {
+      result = tbl.getProperty(attribute);
+    }
+    if (result == null && conf != null) {
+      result = conf.get(attribute);
+      if (result == null) {
+        result = conf.get(hiveConfName);
+      }
+    }
+    return result;
+  }
+
+  public long getLong(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    if (value != null) {
+      return Long.parseLong(value);
+    }
+    return ((Number) defaultValue).longValue();
+  }
+
+  public long getLong(Configuration conf) {
+    return getLong(null, conf);
+  }
+
+  public String getString(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    return value == null ? (String) defaultValue : value;
+  }
+
+  public String getString(Configuration conf) {
+    return getString(null, conf);
+  }
+
+  public boolean getBoolean(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    if (value != null) {
+      return Boolean.parseBoolean(value);
+    }
+    return (Boolean) defaultValue;
+  }
+
+  public boolean getBoolean(Configuration conf) {
+    return getBoolean(null, conf);
+  }
+
+  public double getDouble(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    if (value != null) {
+      return Double.parseDouble(value);
+    }
+    return ((Number) defaultValue).doubleValue();
+  }
+
+  public double getDouble(Configuration conf) {
+    return getDouble(null, conf);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcFile.java b/orc/src/java/org/apache/hive/orc/OrcFile.java
new file mode 100644
index 0000000..5670a61
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/OrcFile.java
@@ -0,0 +1,574 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.impl.MemoryManager;
+import org.apache.hive.orc.impl.OrcTail;
+import org.apache.hive.orc.impl.ReaderImpl;
+import org.apache.hive.orc.impl.WriterImpl;
+
+/**
+ * Contains factory methods to read or write ORC files.
+ */
+public class OrcFile {
+  public static final String MAGIC = "ORC";
+
+  /**
+   * Create a version number for the ORC file format, so that we can add
+   * non-forward compatible changes in the future. To make it easier for users
+   * to understand the version numbers, we use the Hive release number that
+   * first wrote that version of ORC files.
+   *
+   * Thus, if you add new encodings or other non-forward compatible changes
+   * to ORC files, which prevent the old reader from reading the new format,
+   * you should change these variable to reflect the next Hive release number.
+   * Non-forward compatible changes should never be added in patch releases.
+   *
+   * Do not make any changes that break backwards compatibility, which would
+   * prevent the new reader from reading ORC files generated by any released
+   * version of Hive.
+   */
+  public enum Version {
+    V_0_11("0.11", 0, 11),
+    V_0_12("0.12", 0, 12);
+
+    public static final Version CURRENT = V_0_12;
+
+    private final String name;
+    private final int major;
+    private final int minor;
+
+    Version(String name, int major, int minor) {
+      this.name = name;
+      this.major = major;
+      this.minor = minor;
+    }
+
+    public static Version byName(String name) {
+      for(Version version: values()) {
+        if (version.name.equals(name)) {
+          return version;
+        }
+      }
+      throw new IllegalArgumentException("Unknown ORC version " + name);
+    }
+
+    /**
+     * Get the human readable name for the version.
+     */
+    public String getName() {
+      return name;
+    }
+
+    /**
+     * Get the major version number.
+     */
+    public int getMajor() {
+      return major;
+    }
+
+    /**
+     * Get the minor version number.
+     */
+    public int getMinor() {
+      return minor;
+    }
+  }
+
+  /**
+   * Records the version of the writer in terms of which bugs have been fixed.
+   * For bugs in the writer, but the old readers already read the new data
+   * correctly, bump this version instead of the Version.
+   */
+  public enum WriterVersion {
+    ORIGINAL(0),
+    HIVE_8732(1), // corrupted stripe/file maximum column statistics
+    HIVE_4243(2), // use real column names from Hive tables
+    HIVE_12055(3), // vectorized writer
+    HIVE_13083(4), // decimal writer updating present stream wrongly
+
+    // Don't use any magic numbers here except for the below:
+    FUTURE(Integer.MAX_VALUE); // a version from a future writer
+
+    private final int id;
+
+    public int getId() {
+      return id;
+    }
+
+    WriterVersion(int id) {
+      this.id = id;
+    }
+
+    private static final WriterVersion[] values;
+    static {
+      // Assumes few non-negative values close to zero.
+      int max = Integer.MIN_VALUE;
+      for (WriterVersion v : WriterVersion.values()) {
+        if (v.id < 0) throw new AssertionError();
+        if (v.id > max && FUTURE.id != v.id) {
+          max = v.id;
+        }
+      }
+      values = new WriterVersion[max + 1];
+      for (WriterVersion v : WriterVersion.values()) {
+        if (v.id < values.length) {
+          values[v.id] = v;
+        }
+      }
+    }
+
+    /**
+     * Convert the integer from OrcProto.PostScript.writerVersion
+     * to the enumeration with unknown versions being mapped to FUTURE.
+     * @param val the serialized writer version
+     * @return the corresponding enumeration value
+     */
+    public static WriterVersion from(int val) {
+      if (val >= values.length) {
+        return FUTURE;
+      }
+      return values[val];
+    }
+  }
+  public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_13083;
+
+  public enum EncodingStrategy {
+    SPEED, COMPRESSION
+  }
+
+  public enum CompressionStrategy {
+    SPEED, COMPRESSION
+  }
+
+  // unused
+  protected OrcFile() {}
+
+  public static class ReaderOptions {
+    private final Configuration conf;
+    private FileSystem filesystem;
+    private long maxLength = Long.MAX_VALUE;
+    private OrcTail orcTail;
+    // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
+    // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
+    // For now keeping this around to avoid complex surgery
+    private FileMetadata fileMetadata;
+
+    public ReaderOptions(Configuration conf) {
+      this.conf = conf;
+    }
+
+    public ReaderOptions filesystem(FileSystem fs) {
+      this.filesystem = fs;
+      return this;
+    }
+
+    public ReaderOptions maxLength(long val) {
+      maxLength = val;
+      return this;
+    }
+
+    public ReaderOptions orcTail(OrcTail tail) {
+      this.orcTail = tail;
+      return this;
+    }
+
+    public Configuration getConfiguration() {
+      return conf;
+    }
+
+    public FileSystem getFilesystem() {
+      return filesystem;
+    }
+
+    public long getMaxLength() {
+      return maxLength;
+    }
+
+    public OrcTail getOrcTail() {
+      return orcTail;
+    }
+
+    public ReaderOptions fileMetadata(final FileMetadata metadata) {
+      fileMetadata = metadata;
+      return this;
+    }
+
+    public FileMetadata getFileMetadata() {
+      return fileMetadata;
+    }
+  }
+
+  public static ReaderOptions readerOptions(Configuration conf) {
+    return new ReaderOptions(conf);
+  }
+
+  public static Reader createReader(Path path,
+                                    ReaderOptions options) throws IOException {
+    return new ReaderImpl(path, options);
+  }
+
+  public interface WriterContext {
+    Writer getWriter();
+  }
+
+  public interface WriterCallback {
+    void preStripeWrite(WriterContext context) throws IOException;
+    void preFooterWrite(WriterContext context) throws IOException;
+  }
+
+  /**
+   * Options for creating ORC file writers.
+   */
+  public static class WriterOptions {
+    private final Configuration configuration;
+    private FileSystem fileSystemValue = null;
+    private TypeDescription schema = null;
+    private long stripeSizeValue;
+    private long blockSizeValue;
+    private int rowIndexStrideValue;
+    private int bufferSizeValue;
+    private boolean enforceBufferSize = false;
+    private boolean blockPaddingValue;
+    private CompressionKind compressValue;
+    private MemoryManager memoryManagerValue;
+    private Version versionValue;
+    private WriterCallback callback;
+    private EncodingStrategy encodingStrategy;
+    private CompressionStrategy compressionStrategy;
+    private double paddingTolerance;
+    private String bloomFilterColumns;
+    private double bloomFilterFpp;
+
+    protected WriterOptions(Properties tableProperties, Configuration conf) {
+      configuration = conf;
+      memoryManagerValue = getStaticMemoryManager(conf);
+      stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
+      blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
+      rowIndexStrideValue =
+          (int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
+      bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties,
+          conf);
+      blockPaddingValue =
+          OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
+      compressValue =
+          CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
+              conf).toUpperCase());
+      String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
+          conf);
+      versionValue = Version.byName(versionName);
+      String enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties,
+          conf);
+      encodingStrategy = EncodingStrategy.valueOf(enString);
+
+      String compString =
+          OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
+      compressionStrategy = CompressionStrategy.valueOf(compString);
+
+      paddingTolerance =
+          OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
+
+      bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
+          conf);
+      bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
+          conf);
+    }
+
+    /**
+     * Provide the filesystem for the path, if the client has it available.
+     * If it is not provided, it will be found from the path.
+     */
+    public WriterOptions fileSystem(FileSystem value) {
+      fileSystemValue = value;
+      return this;
+    }
+
+    /**
+     * Set the stripe size for the file. The writer stores the contents of the
+     * stripe in memory until this memory limit is reached and the stripe
+     * is flushed to the HDFS file and the next stripe started.
+     */
+    public WriterOptions stripeSize(long value) {
+      stripeSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Set the file system block size for the file. For optimal performance,
+     * set the block size to be multiple factors of stripe size.
+     */
+    public WriterOptions blockSize(long value) {
+      blockSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Set the distance between entries in the row index. The minimum value is
+     * 1000 to prevent the index from overwhelming the data. If the stride is
+     * set to 0, no indexes will be included in the file.
+     */
+    public WriterOptions rowIndexStride(int value) {
+      rowIndexStrideValue = value;
+      return this;
+    }
+
+    /**
+     * The size of the memory buffers used for compressing and storing the
+     * stripe in memory. NOTE: ORC writer may choose to use smaller buffer
+     * size based on stripe size and number of columns for efficient stripe
+     * writing and memory utilization. To enforce writer to use the requested
+     * buffer size use enforceBufferSize().
+     */
+    public WriterOptions bufferSize(int value) {
+      bufferSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Enforce writer to use requested buffer size instead of estimating
+     * buffer size based on stripe size and number of columns.
+     * See bufferSize() method for more info.
+     * Default: false
+     */
+    public WriterOptions enforceBufferSize() {
+      enforceBufferSize = true;
+      return this;
+    }
+
+    /**
+     * Sets whether the HDFS blocks are padded to prevent stripes from
+     * straddling blocks. Padding improves locality and thus the speed of
+     * reading, but costs space.
+     */
+    public WriterOptions blockPadding(boolean value) {
+      blockPaddingValue = value;
+      return this;
+    }
+
+    /**
+     * Sets the encoding strategy that is used to encode the data.
+     */
+    public WriterOptions encodingStrategy(EncodingStrategy strategy) {
+      encodingStrategy = strategy;
+      return this;
+    }
+
+    /**
+     * Sets the tolerance for block padding as a percentage of stripe size.
+     */
+    public WriterOptions paddingTolerance(double value) {
+      paddingTolerance = value;
+      return this;
+    }
+
+    /**
+     * Comma separated values of column names for which bloom filter is to be created.
+     */
+    public WriterOptions bloomFilterColumns(String columns) {
+      bloomFilterColumns = columns;
+      return this;
+    }
+
+    /**
+     * Specify the false positive probability for bloom filter.
+     * @param fpp - false positive probability
+     * @return this
+     */
+    public WriterOptions bloomFilterFpp(double fpp) {
+      bloomFilterFpp = fpp;
+      return this;
+    }
+
+    /**
+     * Sets the generic compression that is used to compress the data.
+     */
+    public WriterOptions compress(CompressionKind value) {
+      compressValue = value;
+      return this;
+    }
+
+    /**
+     * Set the schema for the file. This is a required parameter.
+     * @param schema the schema for the file.
+     * @return this
+     */
+    public WriterOptions setSchema(TypeDescription schema) {
+      this.schema = schema;
+      return this;
+    }
+
+    /**
+     * Sets the version of the file that will be written.
+     */
+    public WriterOptions version(Version value) {
+      versionValue = value;
+      return this;
+    }
+
+    /**
+     * Add a listener for when the stripe and file are about to be closed.
+     * @param callback the object to be called when the stripe is closed
+     * @return this
+     */
+    public WriterOptions callback(WriterCallback callback) {
+      this.callback = callback;
+      return this;
+    }
+
+    /**
+     * A package local option to set the memory manager.
+     */
+    protected WriterOptions memory(MemoryManager value) {
+      memoryManagerValue = value;
+      return this;
+    }
+
+    public boolean getBlockPadding() {
+      return blockPaddingValue;
+    }
+
+    public long getBlockSize() {
+      return blockSizeValue;
+    }
+
+    public String getBloomFilterColumns() {
+      return bloomFilterColumns;
+    }
+
+    public FileSystem getFileSystem() {
+      return fileSystemValue;
+    }
+
+    public Configuration getConfiguration() {
+      return configuration;
+    }
+
+    public TypeDescription getSchema() {
+      return schema;
+    }
+
+    public long getStripeSize() {
+      return stripeSizeValue;
+    }
+
+    public CompressionKind getCompress() {
+      return compressValue;
+    }
+
+    public WriterCallback getCallback() {
+      return callback;
+    }
+
+    public Version getVersion() {
+      return versionValue;
+    }
+
+    public MemoryManager getMemoryManager() {
+      return memoryManagerValue;
+    }
+
+    public int getBufferSize() {
+      return bufferSizeValue;
+    }
+
+    public boolean isEnforceBufferSize() {
+      return enforceBufferSize;
+    }
+
+    public int getRowIndexStride() {
+      return rowIndexStrideValue;
+    }
+
+    public CompressionStrategy getCompressionStrategy() {
+      return compressionStrategy;
+    }
+
+    public EncodingStrategy getEncodingStrategy() {
+      return encodingStrategy;
+    }
+
+    public double getPaddingTolerance() {
+      return paddingTolerance;
+    }
+
+    public double getBloomFilterFpp() {
+      return bloomFilterFpp;
+    }
+  }
+
+  /**
+   * Create a set of writer options based on a configuration.
+   * @param conf the configuration to use for values
+   * @return A WriterOptions object that can be modified
+   */
+  public static WriterOptions writerOptions(Configuration conf) {
+    return new WriterOptions(null, conf);
+  }
+
+  /**
+   * Create a set of write options based on a set of table properties and
+   * configuration.
+   * @param tableProperties the properties of the table
+   * @param conf the configuration of the query
+   * @return a WriterOptions object that can be modified
+   */
+  public static WriterOptions writerOptions(Properties tableProperties,
+                                            Configuration conf) {
+    return new WriterOptions(tableProperties, conf);
+  }
+
+  private static ThreadLocal<MemoryManager> memoryManager = null;
+
+  private static synchronized MemoryManager getStaticMemoryManager(
+      final Configuration conf) {
+    if (memoryManager == null) {
+      memoryManager = new ThreadLocal<MemoryManager>() {
+        @Override
+        protected MemoryManager initialValue() {
+          return new MemoryManager(conf);
+        }
+      };
+    }
+    return memoryManager.get();
+  }
+
+  /**
+   * Create an ORC file writer. This is the public interface for creating
+   * writers going forward and new options will only be added to this method.
+   * @param path filename to write to
+   * @param opts the options
+   * @return a new ORC file writer
+   * @throws IOException
+   */
+  public static Writer createWriter(Path path,
+                                    WriterOptions opts
+                                    ) throws IOException {
+    FileSystem fs = opts.getFileSystem() == null ?
+        path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
+
+    return new WriterImpl(fs, path, opts);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcUtils.java b/orc/src/java/org/apache/hive/orc/OrcUtils.java
new file mode 100644
index 0000000..12cb1f7
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/OrcUtils.java
@@ -0,0 +1,623 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hive.orc.impl.ReaderImpl;
+
+import com.google.common.collect.Lists;
+
+public class OrcUtils {
+
+  /**
+   * Returns selected columns as a boolean array with true value set for specified column names.
+   * The result will contain number of elements equal to flattened number of columns.
+   * For example:
+   * selectedColumns - a,b,c
+   * allColumns - a,b,c,d
+   * If column c is a complex type, say list<string> and other types are primitives then result will
+   * be [false, true, true, true, true, true, false]
+   * Index 0 is the root element of the struct which is set to false by default, index 1,2
+   * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
+   * index 5 correspond to column d. After flattening list<string> gets 2 columns.
+   *
+   * @param selectedColumns - comma separated list of selected column names
+   * @param schema       - object schema
+   * @return - boolean array with true value set for the specified column names
+   */
+  public static boolean[] includeColumns(String selectedColumns,
+                                         TypeDescription schema) {
+    int numFlattenedCols = schema.getMaximumId();
+    boolean[] results = new boolean[numFlattenedCols + 1];
+    if ("*".equals(selectedColumns)) {
+      Arrays.fill(results, true);
+      return results;
+    }
+    if (selectedColumns != null &&
+        schema.getCategory() == TypeDescription.Category.STRUCT) {
+      List<String> fieldNames = schema.getFieldNames();
+      List<TypeDescription> fields = schema.getChildren();
+      for (String column: selectedColumns.split((","))) {
+        TypeDescription col = findColumn(column, fieldNames, fields);
+        if (col != null) {
+          for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
+            results[i] = true;
+          }
+        }
+      }
+    }
+    return results;
+  }
+
+  private static TypeDescription findColumn(String columnName,
+                                            List<String> fieldNames,
+                                            List<TypeDescription> fields) {
+    int i = 0;
+    for(String fieldName: fieldNames) {
+      if (fieldName.equalsIgnoreCase(columnName)) {
+        return fields.get(i);
+      } else {
+        i += 1;
+      }
+    }
+    return null;
+  }
+
+  public static List<OrcProto.Type> getOrcTypes(TypeDescription typeDescr) {
+    List<OrcProto.Type> result = Lists.newArrayList();
+    appendOrcTypes(result, typeDescr);
+    return result;
+  }
+
+  private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr) {
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    List<TypeDescription> children = typeDescr.getChildren();
+    switch (typeDescr.getCategory()) {
+    case BOOLEAN:
+      type.setKind(OrcProto.Type.Kind.BOOLEAN);
+      break;
+    case BYTE:
+      type.setKind(OrcProto.Type.Kind.BYTE);
+      break;
+    case SHORT:
+      type.setKind(OrcProto.Type.Kind.SHORT);
+      break;
+    case INT:
+      type.setKind(OrcProto.Type.Kind.INT);
+      break;
+    case LONG:
+      type.setKind(OrcProto.Type.Kind.LONG);
+      break;
+    case FLOAT:
+      type.setKind(OrcProto.Type.Kind.FLOAT);
+      break;
+    case DOUBLE:
+      type.setKind(OrcProto.Type.Kind.DOUBLE);
+      break;
+    case STRING:
+      type.setKind(OrcProto.Type.Kind.STRING);
+      break;
+    case CHAR:
+      type.setKind(OrcProto.Type.Kind.CHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case VARCHAR:
+      type.setKind(OrcProto.Type.Kind.VARCHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case BINARY:
+      type.setKind(OrcProto.Type.Kind.BINARY);
+      break;
+    case TIMESTAMP:
+      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+      break;
+    case DATE:
+      type.setKind(OrcProto.Type.Kind.DATE);
+      break;
+    case DECIMAL:
+      type.setKind(OrcProto.Type.Kind.DECIMAL);
+      type.setPrecision(typeDescr.getPrecision());
+      type.setScale(typeDescr.getScale());
+      break;
+    case LIST:
+      type.setKind(OrcProto.Type.Kind.LIST);
+      type.addSubtypes(children.get(0).getId());
+      break;
+    case MAP:
+      type.setKind(OrcProto.Type.Kind.MAP);
+      for(TypeDescription t: children) {
+        type.addSubtypes(t.getId());
+      }
+      break;
+    case STRUCT:
+      type.setKind(OrcProto.Type.Kind.STRUCT);
+      for(TypeDescription t: children) {
+        type.addSubtypes(t.getId());
+      }
+      for(String field: typeDescr.getFieldNames()) {
+        type.addFieldNames(field);
+      }
+      break;
+    case UNION:
+      type.setKind(OrcProto.Type.Kind.UNION);
+      for(TypeDescription t: children) {
+        type.addSubtypes(t.getId());
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unknown category: " +
+          typeDescr.getCategory());
+    }
+    result.add(type.build());
+    if (children != null) {
+      for(TypeDescription child: children) {
+        appendOrcTypes(result, child);
+      }
+    }
+  }
+
+  /**
+   * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
+   * numbers based on the length of the result list being appended.
+   *
+   * @param result
+   * @param typeDescr
+   */
+  public static void appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
+      TypeDescription typeDescr) {
+
+    int subtype = result.size();
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    boolean needsAdd = true;
+    List<TypeDescription> children = typeDescr.getChildren();
+    switch (typeDescr.getCategory()) {
+    case BOOLEAN:
+      type.setKind(OrcProto.Type.Kind.BOOLEAN);
+      break;
+    case BYTE:
+      type.setKind(OrcProto.Type.Kind.BYTE);
+      break;
+    case SHORT:
+      type.setKind(OrcProto.Type.Kind.SHORT);
+      break;
+    case INT:
+      type.setKind(OrcProto.Type.Kind.INT);
+      break;
+    case LONG:
+      type.setKind(OrcProto.Type.Kind.LONG);
+      break;
+    case FLOAT:
+      type.setKind(OrcProto.Type.Kind.FLOAT);
+      break;
+    case DOUBLE:
+      type.setKind(OrcProto.Type.Kind.DOUBLE);
+      break;
+    case STRING:
+      type.setKind(OrcProto.Type.Kind.STRING);
+      break;
+    case CHAR:
+      type.setKind(OrcProto.Type.Kind.CHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case VARCHAR:
+      type.setKind(OrcProto.Type.Kind.VARCHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case BINARY:
+      type.setKind(OrcProto.Type.Kind.BINARY);
+      break;
+    case TIMESTAMP:
+      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+      break;
+    case DATE:
+      type.setKind(OrcProto.Type.Kind.DATE);
+      break;
+    case DECIMAL:
+      type.setKind(OrcProto.Type.Kind.DECIMAL);
+      type.setPrecision(typeDescr.getPrecision());
+      type.setScale(typeDescr.getScale());
+      break;
+    case LIST:
+      type.setKind(OrcProto.Type.Kind.LIST);
+      type.addSubtypes(++subtype);
+      result.add(type.build());
+      needsAdd = false;
+      appendOrcTypesRebuildSubtypes(result, children.get(0));
+      break;
+    case MAP:
+      {
+        // Make room for MAP type.
+        result.add(null);
+
+        // Add MAP type pair in order to determine their subtype values.
+        appendOrcTypesRebuildSubtypes(result, children.get(0));
+        int subtype2 = result.size();
+        appendOrcTypesRebuildSubtypes(result, children.get(1));
+        type.setKind(OrcProto.Type.Kind.MAP);
+        type.addSubtypes(subtype + 1);
+        type.addSubtypes(subtype2);
+        result.set(subtype, type.build());
+        needsAdd = false;
+      }
+      break;
+    case STRUCT:
+      {
+        List<String> fieldNames = typeDescr.getFieldNames();
+
+        // Make room for STRUCT type.
+        result.add(null);
+
+        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
+        for(TypeDescription child: children) {
+          int fieldSubtype = result.size();
+          fieldSubtypes.add(fieldSubtype);
+          appendOrcTypesRebuildSubtypes(result, child);
+        }
+
+        type.setKind(OrcProto.Type.Kind.STRUCT);
+
+        for (int i = 0 ; i < fieldNames.size(); i++) {
+          type.addSubtypes(fieldSubtypes.get(i));
+          type.addFieldNames(fieldNames.get(i));
+        }
+        result.set(subtype, type.build());
+        needsAdd = false;
+      }
+      break;
+    case UNION:
+      {
+        // Make room for UNION type.
+        result.add(null);
+
+        List<Integer> unionSubtypes = new ArrayList<Integer>(children.size());
+        for(TypeDescription child: children) {
+          int unionSubtype = result.size();
+          unionSubtypes.add(unionSubtype);
+          appendOrcTypesRebuildSubtypes(result, child);
+        }
+
+        type.setKind(OrcProto.Type.Kind.UNION);
+        for (int i = 0 ; i < children.size(); i++) {
+          type.addSubtypes(unionSubtypes.get(i));
+        }
+        result.set(subtype, type.build());
+        needsAdd = false;
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory());
+    }
+    if (needsAdd) {
+      result.add(type.build());
+    }
+  }
+
+  /**
+   * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype
+   * numbers based on the length of the result list being appended.
+   *
+   * @param result
+   * @param types
+   * @param columnId
+   */
+  public static int appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
+      List<OrcProto.Type> types, int columnId) {
+
+    OrcProto.Type oldType = types.get(columnId++);
+
+    int subtype = result.size();
+    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
+    boolean needsAdd = true;
+    switch (oldType.getKind()) {
+    case BOOLEAN:
+      builder.setKind(OrcProto.Type.Kind.BOOLEAN);
+      break;
+    case BYTE:
+      builder.setKind(OrcProto.Type.Kind.BYTE);
+      break;
+    case SHORT:
+      builder.setKind(OrcProto.Type.Kind.SHORT);
+      break;
+    case INT:
+      builder.setKind(OrcProto.Type.Kind.INT);
+      break;
+    case LONG:
+      builder.setKind(OrcProto.Type.Kind.LONG);
+      break;
+    case FLOAT:
+      builder.setKind(OrcProto.Type.Kind.FLOAT);
+      break;
+    case DOUBLE:
+      builder.setKind(OrcProto.Type.Kind.DOUBLE);
+      break;
+    case STRING:
+      builder.setKind(OrcProto.Type.Kind.STRING);
+      break;
+    case CHAR:
+      builder.setKind(OrcProto.Type.Kind.CHAR);
+      builder.setMaximumLength(oldType.getMaximumLength());
+      break;
+    case VARCHAR:
+      builder.setKind(OrcProto.Type.Kind.VARCHAR);
+      builder.setMaximumLength(oldType.getMaximumLength());
+      break;
+    case BINARY:
+      builder.setKind(OrcProto.Type.Kind.BINARY);
+      break;
+    case TIMESTAMP:
+      builder.setKind(OrcProto.Type.Kind.TIMESTAMP);
+      break;
+    case DATE:
+      builder.setKind(OrcProto.Type.Kind.DATE);
+      break;
+    case DECIMAL:
+      builder.setKind(OrcProto.Type.Kind.DECIMAL);
+      builder.setPrecision(oldType.getPrecision());
+      builder.setScale(oldType.getScale());
+      break;
+    case LIST:
+      builder.setKind(OrcProto.Type.Kind.LIST);
+      builder.addSubtypes(++subtype);
+      result.add(builder.build());
+      needsAdd = false;
+      columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+      break;
+    case MAP:
+      {
+        // Make room for MAP type.
+        result.add(null);
+
+        // Add MAP type pair in order to determine their subtype values.
+        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        int subtype2 = result.size();
+        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        builder.setKind(OrcProto.Type.Kind.MAP);
+        builder.addSubtypes(subtype + 1);
+        builder.addSubtypes(subtype2);
+        result.set(subtype, builder.build());
+        needsAdd = false;
+      }
+      break;
+    case STRUCT:
+      {
+        List<String> fieldNames = oldType.getFieldNamesList();
+
+        // Make room for STRUCT type.
+        result.add(null);
+
+        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
+        for(int i = 0 ; i < fieldNames.size(); i++) {
+          int fieldSubtype = result.size();
+          fieldSubtypes.add(fieldSubtype);
+          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        }
+
+        builder.setKind(OrcProto.Type.Kind.STRUCT);
+
+        for (int i = 0 ; i < fieldNames.size(); i++) {
+          builder.addSubtypes(fieldSubtypes.get(i));
+          builder.addFieldNames(fieldNames.get(i));
+        }
+        result.set(subtype, builder.build());
+        needsAdd = false;
+      }
+      break;
+    case UNION:
+      {
+        int subtypeCount = oldType.getSubtypesCount();
+
+        // Make room for UNION type.
+        result.add(null);
+
+        List<Integer> unionSubtypes = new ArrayList<Integer>(subtypeCount);
+        for(int i = 0 ; i < subtypeCount; i++) {
+          int unionSubtype = result.size();
+          unionSubtypes.add(unionSubtype);
+          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        }
+
+        builder.setKind(OrcProto.Type.Kind.UNION);
+        for (int i = 0 ; i < subtypeCount; i++) {
+          builder.addSubtypes(unionSubtypes.get(i));
+        }
+        result.set(subtype, builder.build());
+        needsAdd = false;
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unknown category: " + oldType.getKind());
+    }
+    if (needsAdd) {
+      result.add(builder.build());
+    }
+    return columnId;
+  }
+
+  /**
+   * Translate the given rootColumn from the list of types to a TypeDescription.
+   * @param types all of the types
+   * @param rootColumn translate this type
+   * @return a new TypeDescription that matches the given rootColumn
+   */
+  public static
+        TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
+                                                int rootColumn) {
+    OrcProto.Type type = types.get(rootColumn);
+    switch (type.getKind()) {
+      case BOOLEAN:
+        return TypeDescription.createBoolean();
+      case BYTE:
+        return TypeDescription.createByte();
+      case SHORT:
+        return TypeDescription.createShort();
+      case INT:
+        return TypeDescription.createInt();
+      case LONG:
+        return TypeDescription.createLong();
+      case FLOAT:
+        return TypeDescription.createFloat();
+      case DOUBLE:
+        return TypeDescription.createDouble();
+      case STRING:
+        return TypeDescription.createString();
+      case CHAR:
+      case VARCHAR: {
+        TypeDescription result = type.getKind() == OrcProto.Type.Kind.CHAR ?
+            TypeDescription.createChar() : TypeDescription.createVarchar();
+        if (type.hasMaximumLength()) {
+          result.withMaxLength(type.getMaximumLength());
+        }
+        return result;
+      }
+      case BINARY:
+        return TypeDescription.createBinary();
+      case TIMESTAMP:
+        return TypeDescription.createTimestamp();
+      case DATE:
+        return TypeDescription.createDate();
+      case DECIMAL: {
+        TypeDescription result = TypeDescription.createDecimal();
+        if (type.hasScale()) {
+          result.withScale(type.getScale());
+        }
+        if (type.hasPrecision()) {
+          result.withPrecision(type.getPrecision());
+        }
+        return result;
+      }
+      case LIST:
+        return TypeDescription.createList(
+            convertTypeFromProtobuf(types, type.getSubtypes(0)));
+      case MAP:
+        return TypeDescription.createMap(
+            convertTypeFromProtobuf(types, type.getSubtypes(0)),
+            convertTypeFromProtobuf(types, type.getSubtypes(1)));
+      case STRUCT: {
+        TypeDescription result = TypeDescription.createStruct();
+        for(int f=0; f < type.getSubtypesCount(); ++f) {
+          result.addField(type.getFieldNames(f),
+              convertTypeFromProtobuf(types, type.getSubtypes(f)));
+        }
+        return result;
+      }
+      case UNION: {
+        TypeDescription result = TypeDescription.createUnion();
+        for(int f=0; f < type.getSubtypesCount(); ++f) {
+          result.addUnionChild(
+              convertTypeFromProtobuf(types, type.getSubtypes(f)));
+        }
+        return result;
+      }
+    }
+    throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
+  }
+
+  public static List<StripeInformation> convertProtoStripesToStripes(
+      List<OrcProto.StripeInformation> stripes) {
+    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
+    for (OrcProto.StripeInformation info : stripes) {
+      result.add(new ReaderImpl.StripeInformationImpl(info));
+    }
+    return result;
+  }
+
+  public static List<TypeDescription> setTypeBuilderFromSchema(
+      OrcProto.Type.Builder type, TypeDescription schema) {
+    List<TypeDescription> children = schema.getChildren();
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        type.setKind(OrcProto.Type.Kind.BOOLEAN);
+        break;
+      case BYTE:
+        type.setKind(OrcProto.Type.Kind.BYTE);
+        break;
+      case SHORT:
+        type.setKind(OrcProto.Type.Kind.SHORT);
+        break;
+      case INT:
+        type.setKind(OrcProto.Type.Kind.INT);
+        break;
+      case LONG:
+        type.setKind(OrcProto.Type.Kind.LONG);
+        break;
+      case FLOAT:
+        type.setKind(OrcProto.Type.Kind.FLOAT);
+        break;
+      case DOUBLE:
+        type.setKind(OrcProto.Type.Kind.DOUBLE);
+        break;
+      case STRING:
+        type.setKind(OrcProto.Type.Kind.STRING);
+        break;
+      case CHAR:
+        type.setKind(OrcProto.Type.Kind.CHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case VARCHAR:
+        type.setKind(OrcProto.Type.Kind.VARCHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case BINARY:
+        type.setKind(OrcProto.Type.Kind.BINARY);
+        break;
+      case TIMESTAMP:
+        type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+        break;
+      case DATE:
+        type.setKind(OrcProto.Type.Kind.DATE);
+        break;
+      case DECIMAL:
+        type.setKind(OrcProto.Type.Kind.DECIMAL);
+        type.setPrecision(schema.getPrecision());
+        type.setScale(schema.getScale());
+        break;
+      case LIST:
+        type.setKind(OrcProto.Type.Kind.LIST);
+        type.addSubtypes(children.get(0).getId());
+        break;
+      case MAP:
+        type.setKind(OrcProto.Type.Kind.MAP);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
+        break;
+      case STRUCT:
+        type.setKind(OrcProto.Type.Kind.STRUCT);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
+        for(String field: schema.getFieldNames()) {
+          type.addFieldNames(field);
+        }
+        break;
+      case UNION:
+        type.setKind(OrcProto.Type.Kind.UNION);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown category: " +
+          schema.getCategory());
+    }
+    return children;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/Reader.java b/orc/src/java/org/apache/hive/orc/Reader.java
new file mode 100644
index 0000000..61ce186
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/Reader.java
@@ -0,0 +1,375 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+
+/**
+ * The interface for reading ORC files.
+ *
+ * One Reader can support multiple concurrent RecordReader.
+ */
+public interface Reader {
+
+  /**
+   * Get the number of rows in the file.
+   * @return the number of rows
+   */
+  long getNumberOfRows();
+
+  /**
+   * Get the deserialized data size of the file
+   * @return raw data size
+   */
+  long getRawDataSize();
+
+  /**
+   * Get the deserialized data size of the specified columns
+   * @param colNames
+   * @return raw data size of columns
+   */
+  long getRawDataSizeOfColumns(List<String> colNames);
+
+  /**
+   * Get the deserialized data size of the specified columns ids
+   * @param colIds - internal column id (check orcfiledump for column ids)
+   * @return raw data size of columns
+   */
+  long getRawDataSizeFromColIndices(List<Integer> colIds);
+
+  /**
+   * Get the user metadata keys.
+   * @return the set of metadata keys
+   */
+  List<String> getMetadataKeys();
+
+  /**
+   * Get a user metadata value.
+   * @param key a key given by the user
+   * @return the bytes associated with the given key
+   */
+  ByteBuffer getMetadataValue(String key);
+
+  /**
+   * Did the user set the given metadata value.
+   * @param key the key to check
+   * @return true if the metadata value was set
+   */
+  boolean hasMetadataValue(String key);
+
+  /**
+   * Get the compression kind.
+   * @return the kind of compression in the file
+   */
+  CompressionKind getCompressionKind();
+
+  /**
+   * Get the buffer size for the compression.
+   * @return number of bytes to buffer for the compression codec.
+   */
+  int getCompressionSize();
+
+  /**
+   * Get the number of rows per a entry in the row index.
+   * @return the number of rows per an entry in the row index or 0 if there
+   * is no row index.
+   */
+  int getRowIndexStride();
+
+  /**
+   * Get the list of stripes.
+   * @return the information about the stripes in order
+   */
+  List<StripeInformation> getStripes();
+
+  /**
+   * Get the length of the file.
+   * @return the number of bytes in the file
+   */
+  long getContentLength();
+
+  /**
+   * Get the statistics about the columns in the file.
+   * @return the information about the column
+   */
+  ColumnStatistics[] getStatistics();
+
+  /**
+   * Get the type of rows in this ORC file.
+   */
+  TypeDescription getSchema();
+
+  /**
+   * Get the list of types contained in the file. The root type is the first
+   * type in the list.
+   * @return the list of flattened types
+   * @deprecated use getSchema instead
+   */
+  List<OrcProto.Type> getTypes();
+
+  /**
+   * Get the file format version.
+   */
+  OrcFile.Version getFileVersion();
+
+  /**
+   * Get the version of the writer of this file.
+   */
+  OrcFile.WriterVersion getWriterVersion();
+
+  /**
+   * Get the file tail (footer + postscript)
+   *
+   * @return - file tail
+   */
+  OrcProto.FileTail getFileTail();
+
+  /**
+   * Options for creating a RecordReader.
+   */
+  public static class Options {
+    private boolean[] include;
+    private long offset = 0;
+    private long length = Long.MAX_VALUE;
+    private SearchArgument sarg = null;
+    private String[] columnNames = null;
+    private Boolean useZeroCopy = null;
+    private Boolean skipCorruptRecords = null;
+    private TypeDescription schema = null;
+    private DataReader dataReader = null;
+
+    /**
+     * Set the list of columns to read.
+     * @param include a list of columns to read
+     * @return this
+     */
+    public Options include(boolean[] include) {
+      this.include = include;
+      return this;
+    }
+
+    /**
+     * Set the range of bytes to read
+     * @param offset the starting byte offset
+     * @param length the number of bytes to read
+     * @return this
+     */
+    public Options range(long offset, long length) {
+      this.offset = offset;
+      this.length = length;
+      return this;
+    }
+
+    /**
+     * Set the schema on read type description.
+     */
+    public Options schema(TypeDescription schema) {
+      this.schema = schema;
+      return this;
+    }
+
+    /**
+     * Set search argument for predicate push down.
+     * @param sarg the search argument
+     * @param columnNames the column names for
+     * @return this
+     */
+    public Options searchArgument(SearchArgument sarg, String[] columnNames) {
+      this.sarg = sarg;
+      this.columnNames = columnNames;
+      return this;
+    }
+
+    /**
+     * Set whether to use zero copy from HDFS.
+     * @param value the new zero copy flag
+     * @return this
+     */
+    public Options useZeroCopy(boolean value) {
+      this.useZeroCopy = value;
+      return this;
+    }
+
+    public Options dataReader(DataReader value) {
+      this.dataReader = value;
+      return this;
+    }
+
+    /**
+     * Set whether to skip corrupt records.
+     * @param value the new skip corrupt records flag
+     * @return this
+     */
+    public Options skipCorruptRecords(boolean value) {
+      this.skipCorruptRecords = value;
+      return this;
+    }
+
+    public boolean[] getInclude() {
+      return include;
+    }
+
+    public long getOffset() {
+      return offset;
+    }
+
+    public long getLength() {
+      return length;
+    }
+
+    public TypeDescription getSchema() {
+      return schema;
+    }
+
+    public SearchArgument getSearchArgument() {
+      return sarg;
+    }
+
+    public String[] getColumnNames() {
+      return columnNames;
+    }
+
+    public long getMaxOffset() {
+      long result = offset + length;
+      if (result < 0) {
+        result = Long.MAX_VALUE;
+      }
+      return result;
+    }
+
+    public Boolean getUseZeroCopy() {
+      return useZeroCopy;
+    }
+
+    public Boolean getSkipCorruptRecords() {
+      return skipCorruptRecords;
+    }
+
+    public DataReader getDataReader() {
+      return dataReader;
+    }
+
+    public Options clone() {
+      Options result = new Options();
+      result.include = include;
+      result.offset = offset;
+      result.length = length;
+      result.sarg = sarg;
+      result.schema = schema;
+      result.columnNames = columnNames;
+      result.useZeroCopy = useZeroCopy;
+      result.skipCorruptRecords = skipCorruptRecords;
+      result.dataReader = dataReader == null ? null : dataReader.clone();
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buffer = new StringBuilder();
+      buffer.append("{include: ");
+      if (include == null) {
+        buffer.append("null");
+      } else {
+        buffer.append("[");
+        for(int i=0; i < include.length; ++i) {
+          if (i != 0) {
+            buffer.append(", ");
+          }
+          buffer.append(include[i]);
+        }
+        buffer.append("]");
+      }
+      buffer.append(", offset: ");
+      buffer.append(offset);
+      buffer.append(", length: ");
+      buffer.append(length);
+      if (sarg != null) {
+        buffer.append(", sarg: ");
+        buffer.append(sarg.toString());
+        buffer.append(", columns: [");
+        for(int i=0; i < columnNames.length; ++i) {
+          if (i != 0) {
+            buffer.append(", ");
+          }
+          buffer.append("'");
+          buffer.append(columnNames[i]);
+          buffer.append("'");
+        }
+        buffer.append("]");
+      }
+      if (schema != null) {
+        buffer.append(", schema: ");
+        schema.printToBuffer(buffer);
+      }
+      buffer.append("}");
+      return buffer.toString();
+    }
+  }
+
+  /**
+   * Create a RecordReader that reads everything with the default options.
+   * @return a new RecordReader
+   * @throws IOException
+   */
+  RecordReader rows() throws IOException;
+
+  /**
+   * Create a RecordReader that uses the options given.
+   * This method can't be named rows, because many callers used rows(null)
+   * before the rows() method was introduced.
+   * @param options the options to read with
+   * @return a new RecordReader
+   * @throws IOException
+   */
+  RecordReader rows(Options options) throws IOException;
+
+  /**
+   * @return List of integers representing version of the file, in order from major to minor.
+   */
+  List<Integer> getVersionList();
+
+  /**
+   * @return Gets the size of metadata, in bytes.
+   */
+  int getMetadataSize();
+
+  /**
+   * @return Stripe statistics, in original protobuf form.
+   */
+  List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
+
+  /**
+   * @return Stripe statistics.
+   */
+  List<StripeStatistics> getStripeStatistics() throws IOException;
+
+  /**
+   * @return File statistics, in original protobuf form.
+   */
+  List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
+
+  /**
+   * @return Serialized file metadata read from disk for the purposes of caching, etc.
+   */
+  ByteBuffer getSerializedFileFooter();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/RecordReader.java b/orc/src/java/org/apache/hive/orc/RecordReader.java
new file mode 100644
index 0000000..f86fa0e
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/RecordReader.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * A row-by-row iterator for ORC files.
+ */
+public interface RecordReader {
+  /**
+   * Read the next row batch. The size of the batch to read cannot be
+   * controlled by the callers. Caller need to look at
+   * VectorizedRowBatch.size of the retunred object to know the batch
+   * size read.
+   * @param batch a row batch object to read into
+   * @return were more rows available to read?
+   * @throws java.io.IOException
+   */
+  boolean nextBatch(VectorizedRowBatch batch) throws IOException;
+
+  /**
+   * Get the row number of the row that will be returned by the following
+   * call to next().
+   * @return the row number from 0 to the number of rows in the file
+   * @throws java.io.IOException
+   */
+  long getRowNumber() throws IOException;
+
+  /**
+   * Get the progress of the reader through the rows.
+   * @return a fraction between 0.0 and 1.0 of rows read
+   * @throws java.io.IOException
+   */
+  float getProgress() throws IOException;
+
+  /**
+   * Release the resources associated with the given reader.
+   * @throws java.io.IOException
+   */
+  void close() throws IOException;
+
+  /**
+   * Seek to a particular row number.
+   */
+  void seekToRow(long rowCount) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java b/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java
new file mode 100644
index 0000000..8a81413
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for string columns.
+ */
+public interface StringColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the minimum string.
+   * @return the minimum
+   */
+  String getMinimum();
+
+  /**
+   * Get the maximum string.
+   * @return the maximum
+   */
+  String getMaximum();
+
+  /**
+   * Get the total length of all strings
+   * @return the sum (total length)
+   */
+  long getSum();
+}


[20/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/DataReaderProperties.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DataReaderProperties.java b/orc/src/java/org/apache/orc/impl/DataReaderProperties.java
deleted file mode 100644
index 22301e8..0000000
--- a/orc/src/java/org/apache/orc/impl/DataReaderProperties.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import com.google.common.base.Preconditions;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.CompressionKind;
-
-import javax.annotation.Nullable;
-
-public final class DataReaderProperties {
-
-  private final FileSystem fileSystem;
-  private final Path path;
-  private final CompressionKind compression;
-  private final boolean zeroCopy;
-  private final int typeCount;
-  private final int bufferSize;
-
-  private DataReaderProperties(Builder builder) {
-    this.fileSystem = builder.fileSystem;
-    this.path = builder.path;
-    this.compression = builder.compression;
-    this.zeroCopy = builder.zeroCopy;
-    this.typeCount = builder.typeCount;
-    this.bufferSize = builder.bufferSize;
-  }
-
-  public FileSystem getFileSystem() {
-    return fileSystem;
-  }
-
-  public Path getPath() {
-    return path;
-  }
-
-  public CompressionKind getCompression() {
-    return compression;
-  }
-
-  public boolean getZeroCopy() {
-    return zeroCopy;
-  }
-
-  public int getTypeCount() {
-    return typeCount;
-  }
-
-  public int getBufferSize() {
-    return bufferSize;
-  }
-
-  public static Builder builder() {
-    return new Builder();
-  }
-
-  public static class Builder {
-
-    private FileSystem fileSystem;
-    private Path path;
-    private CompressionKind compression;
-    private boolean zeroCopy;
-    private int typeCount;
-    private int bufferSize;
-
-    private Builder() {
-
-    }
-
-    public Builder withFileSystem(FileSystem fileSystem) {
-      this.fileSystem = fileSystem;
-      return this;
-    }
-
-    public Builder withPath(Path path) {
-      this.path = path;
-      return this;
-    }
-
-    public Builder withCompression(CompressionKind value) {
-      this.compression = value;
-      return this;
-    }
-
-    public Builder withZeroCopy(boolean zeroCopy) {
-      this.zeroCopy = zeroCopy;
-      return this;
-    }
-
-    public Builder withTypeCount(int value) {
-      this.typeCount = value;
-      return this;
-    }
-
-    public Builder withBufferSize(int value) {
-      this.bufferSize = value;
-      return this;
-    }
-
-    public DataReaderProperties build() {
-      Preconditions.checkNotNull(fileSystem);
-      Preconditions.checkNotNull(path);
-
-      return new DataReaderProperties(this);
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java b/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
deleted file mode 100644
index 7e0110d..0000000
--- a/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import org.apache.orc.CompressionCodec;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-public interface DirectDecompressionCodec extends CompressionCodec {
-  public boolean isAvailable();
-  public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/DynamicByteArray.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DynamicByteArray.java b/orc/src/java/org/apache/orc/impl/DynamicByteArray.java
deleted file mode 100644
index 986c2ac..0000000
--- a/orc/src/java/org/apache/orc/impl/DynamicByteArray.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.io.Text;
-
-/**
- * A class that is a growable array of bytes. Growth is managed in terms of
- * chunks that are allocated when needed.
- */
-public final class DynamicByteArray {
-  static final int DEFAULT_CHUNKSIZE = 32 * 1024;
-  static final int DEFAULT_NUM_CHUNKS = 128;
-
-  private final int chunkSize;        // our allocation sizes
-  private byte[][] data;              // the real data
-  private int length;                 // max set element index +1
-  private int initializedChunks = 0;  // the number of chunks created
-
-  public DynamicByteArray() {
-    this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
-  }
-
-  public DynamicByteArray(int numChunks, int chunkSize) {
-    if (chunkSize == 0) {
-      throw new IllegalArgumentException("bad chunksize");
-    }
-    this.chunkSize = chunkSize;
-    data = new byte[numChunks][];
-  }
-
-  /**
-   * Ensure that the given index is valid.
-   */
-  private void grow(int chunkIndex) {
-    if (chunkIndex >= initializedChunks) {
-      if (chunkIndex >= data.length) {
-        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
-        byte[][] newChunk = new byte[newSize][];
-        System.arraycopy(data, 0, newChunk, 0, data.length);
-        data = newChunk;
-      }
-      for(int i=initializedChunks; i <= chunkIndex; ++i) {
-        data[i] = new byte[chunkSize];
-      }
-      initializedChunks = chunkIndex + 1;
-    }
-  }
-
-  public byte get(int index) {
-    if (index >= length) {
-      throw new IndexOutOfBoundsException("Index " + index +
-                                            " is outside of 0.." +
-                                            (length - 1));
-    }
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    return data[i][j];
-  }
-
-  public void set(int index, byte value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] = value;
-  }
-
-  public int add(byte value) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow(i);
-    data[i][j] = value;
-    int result = length;
-    length += 1;
-    return result;
-  }
-
-  /**
-   * Copy a slice of a byte array into our buffer.
-   * @param value the array to copy from
-   * @param valueOffset the first location to copy from value
-   * @param valueLength the number of bytes to copy from value
-   * @return the offset of the start of the value
-   */
-  public int add(byte[] value, int valueOffset, int valueLength) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow((length + valueLength) / chunkSize);
-    int remaining = valueLength;
-    while (remaining > 0) {
-      int size = Math.min(remaining, chunkSize - j);
-      System.arraycopy(value, valueOffset, data[i], j, size);
-      remaining -= size;
-      valueOffset += size;
-      i += 1;
-      j = 0;
-    }
-    int result = length;
-    length += valueLength;
-    return result;
-  }
-
-  /**
-   * Read the entire stream into this array.
-   * @param in the stream to read from
-   * @throws IOException
-   */
-  public void readAll(InputStream in) throws IOException {
-    int currentChunk = length / chunkSize;
-    int currentOffset = length % chunkSize;
-    grow(currentChunk);
-    int currentLength = in.read(data[currentChunk], currentOffset,
-      chunkSize - currentOffset);
-    while (currentLength > 0) {
-      length += currentLength;
-      currentOffset = length % chunkSize;
-      if (currentOffset == 0) {
-        currentChunk = length / chunkSize;
-        grow(currentChunk);
-      }
-      currentLength = in.read(data[currentChunk], currentOffset,
-        chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Byte compare a set of bytes against the bytes in this dynamic array.
-   * @param other source of the other bytes
-   * @param otherOffset start offset in the other array
-   * @param otherLength number of bytes in the other array
-   * @param ourOffset the offset in our array
-   * @param ourLength the number of bytes in our array
-   * @return negative for less, 0 for equal, positive for greater
-   */
-  public int compare(byte[] other, int otherOffset, int otherLength,
-                     int ourOffset, int ourLength) {
-    int currentChunk = ourOffset / chunkSize;
-    int currentOffset = ourOffset % chunkSize;
-    int maxLength = Math.min(otherLength, ourLength);
-    while (maxLength > 0 &&
-      other[otherOffset] == data[currentChunk][currentOffset]) {
-      otherOffset += 1;
-      currentOffset += 1;
-      if (currentOffset == chunkSize) {
-        currentChunk += 1;
-        currentOffset = 0;
-      }
-      maxLength -= 1;
-    }
-    if (maxLength == 0) {
-      return otherLength - ourLength;
-    }
-    int otherByte = 0xff & other[otherOffset];
-    int ourByte = 0xff & data[currentChunk][currentOffset];
-    return otherByte > ourByte ? 1 : -1;
-  }
-
-  /**
-   * Get the size of the array.
-   * @return the number of bytes in the array
-   */
-  public int size() {
-    return length;
-  }
-
-  /**
-   * Clear the array to its original pristine state.
-   */
-  public void clear() {
-    length = 0;
-    for(int i=0; i < data.length; ++i) {
-      data[i] = null;
-    }
-    initializedChunks = 0;
-  }
-
-  /**
-   * Set a text value from the bytes in this dynamic array.
-   * @param result the value to set
-   * @param offset the start of the bytes to copy
-   * @param length the number of bytes to copy
-   */
-  public void setText(Text result, int offset, int length) {
-    result.clear();
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    int currentLength = Math.min(length, chunkSize - currentOffset);
-    while (length > 0) {
-      result.append(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-      currentLength = Math.min(length, chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Write out a range of this dynamic array to an output stream.
-   * @param out the stream to write to
-   * @param offset the first offset to write
-   * @param length the number of bytes to write
-   * @throws IOException
-   */
-  public void write(OutputStream out, int offset,
-                    int length) throws IOException {
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    while (length > 0) {
-      int currentLength = Math.min(length, chunkSize - currentOffset);
-      out.write(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-    }
-  }
-
-  @Override
-  public String toString() {
-    int i;
-    StringBuilder sb = new StringBuilder(length * 3);
-
-    sb.append('{');
-    int l = length - 1;
-    for (i=0; i<l; i++) {
-      sb.append(Integer.toHexString(get(i)));
-      sb.append(',');
-    }
-    sb.append(get(i));
-    sb.append('}');
-
-    return sb.toString();
-  }
-
-  public void setByteBuffer(ByteBuffer result, int offset, int length) {
-    result.clear();
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    int currentLength = Math.min(length, chunkSize - currentOffset);
-    while (length > 0) {
-      result.put(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-      currentLength = Math.min(length, chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Gets all the bytes of the array.
-   *
-   * @return Bytes of the array
-   */
-  public byte[] get() {
-    byte[] result = null;
-    if (length > 0) {
-      int currentChunk = 0;
-      int currentOffset = 0;
-      int currentLength = Math.min(length, chunkSize);
-      int destOffset = 0;
-      result = new byte[length];
-      int totalLength = length;
-      while (totalLength > 0) {
-        System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength);
-        destOffset += currentLength;
-        totalLength -= currentLength;
-        currentChunk += 1;
-        currentOffset = 0;
-        currentLength = Math.min(totalLength, chunkSize - currentOffset);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Get the size of the buffers.
-   */
-  public long getSizeInBytes() {
-    return initializedChunks * chunkSize;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/DynamicIntArray.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DynamicIntArray.java b/orc/src/java/org/apache/orc/impl/DynamicIntArray.java
deleted file mode 100644
index 3b2884b..0000000
--- a/orc/src/java/org/apache/orc/impl/DynamicIntArray.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-/**
- * Dynamic int array that uses primitive types and chunks to avoid copying
- * large number of integers when it resizes.
- *
- * The motivation for this class is memory optimization, i.e. space efficient
- * storage of potentially huge arrays without good a-priori size guesses.
- *
- * The API of this class is between a primitive array and a AbstractList. It's
- * not a Collection implementation because it handles primitive types, but the
- * API could be extended to support iterators and the like.
- *
- * NOTE: Like standard Collection implementations/arrays, this class is not
- * synchronized.
- */
-public final class DynamicIntArray {
-  static final int DEFAULT_CHUNKSIZE = 8 * 1024;
-  static final int INIT_CHUNKS = 128;
-
-  private final int chunkSize;       // our allocation size
-  private int[][] data;              // the real data
-  private int length;                // max set element index +1
-  private int initializedChunks = 0; // the number of created chunks
-
-  public DynamicIntArray() {
-    this(DEFAULT_CHUNKSIZE);
-  }
-
-  public DynamicIntArray(int chunkSize) {
-    this.chunkSize = chunkSize;
-
-    data = new int[INIT_CHUNKS][];
-  }
-
-  /**
-   * Ensure that the given index is valid.
-   */
-  private void grow(int chunkIndex) {
-    if (chunkIndex >= initializedChunks) {
-      if (chunkIndex >= data.length) {
-        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
-        int[][] newChunk = new int[newSize][];
-        System.arraycopy(data, 0, newChunk, 0, data.length);
-        data = newChunk;
-      }
-      for (int i=initializedChunks; i <= chunkIndex; ++i) {
-        data[i] = new int[chunkSize];
-      }
-      initializedChunks = chunkIndex + 1;
-    }
-  }
-
-  public int get(int index) {
-    if (index >= length) {
-      throw new IndexOutOfBoundsException("Index " + index +
-                                            " is outside of 0.." +
-                                            (length - 1));
-    }
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    return data[i][j];
-  }
-
-  public void set(int index, int value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] = value;
-  }
-
-  public void increment(int index, int value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] += value;
-  }
-
-  public void add(int value) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow(i);
-    data[i][j] = value;
-    length += 1;
-  }
-
-  public int size() {
-    return length;
-  }
-
-  public void clear() {
-    length = 0;
-    for(int i=0; i < data.length; ++i) {
-      data[i] = null;
-    }
-    initializedChunks = 0;
-  }
-
-  public String toString() {
-    int i;
-    StringBuilder sb = new StringBuilder(length * 4);
-
-    sb.append('{');
-    int l = length - 1;
-    for (i=0; i<l; i++) {
-      sb.append(get(i));
-      sb.append(',');
-    }
-    sb.append(get(i));
-    sb.append('}');
-
-    return sb.toString();
-  }
-
-  public int getSizeInBytes() {
-    return 4 * initializedChunks * chunkSize;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/HadoopShims.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShims.java b/orc/src/java/org/apache/orc/impl/HadoopShims.java
deleted file mode 100644
index ef7d70f..0000000
--- a/orc/src/java/org/apache/orc/impl/HadoopShims.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.util.VersionInfo;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-
-public interface HadoopShims {
-
-  enum DirectCompressionType {
-    NONE,
-    ZLIB_NOHEADER,
-    ZLIB,
-    SNAPPY,
-  }
-
-  interface DirectDecompressor {
-    void decompress(ByteBuffer var1, ByteBuffer var2) throws IOException;
-  }
-
-  /**
-   * Get a direct decompressor codec, if it is available
-   * @param codec
-   * @return
-   */
-  DirectDecompressor getDirectDecompressor(DirectCompressionType codec);
-
-  /**
-   * a hadoop.io ByteBufferPool shim.
-   */
-  public interface ByteBufferPoolShim {
-    /**
-     * Get a new ByteBuffer from the pool.  The pool can provide this from
-     * removing a buffer from its internal cache, or by allocating a
-     * new buffer.
-     *
-     * @param direct     Whether the buffer should be direct.
-     * @param length     The minimum length the buffer will have.
-     * @return           A new ByteBuffer. Its capacity can be less
-     *                   than what was requested, but must be at
-     *                   least 1 byte.
-     */
-    ByteBuffer getBuffer(boolean direct, int length);
-
-    /**
-     * Release a buffer back to the pool.
-     * The pool may choose to put this buffer into its cache/free it.
-     *
-     * @param buffer    a direct bytebuffer
-     */
-    void putBuffer(ByteBuffer buffer);
-  }
-
-  /**
-   * Provides an HDFS ZeroCopyReader shim.
-   * @param in FSDataInputStream to read from (where the cached/mmap buffers are tied to)
-   * @param in ByteBufferPoolShim to allocate fallback buffers with
-   *
-   * @return returns null if not supported
-   */
-  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException;
-
-  public interface ZeroCopyReaderShim extends Closeable {
-    /**
-     * Get a ByteBuffer from the FSDataInputStream - this can be either a HeapByteBuffer or an MappedByteBuffer.
-     * Also move the in stream by that amount. The data read can be small than maxLength.
-     *
-     * @return ByteBuffer read from the stream,
-     */
-    public ByteBuffer readBuffer(int maxLength, boolean verifyChecksums) throws IOException;
-    /**
-     * Release a ByteBuffer obtained from a read on the
-     * Also move the in stream by that amount. The data read can be small than maxLength.
-     *
-     */
-    public void releaseBuffer(ByteBuffer buffer);
-
-    /**
-     * Close the underlying stream.
-     * @throws IOException
-     */
-    public void close() throws IOException;
-  }
-  /**
-   * Read data into a Text object in the fastest way possible
-   */
-  public interface TextReaderShim {
-    /**
-     * @param txt
-     * @param size
-     * @return bytes read
-     * @throws IOException
-     */
-    void read(Text txt, int size) throws IOException;
-  }
-
-  /**
-   * Wrap a TextReaderShim around an input stream. The reader shim will not
-   * buffer any reads from the underlying stream and will only consume bytes
-   * which are required for TextReaderShim.read() input.
-   */
-  public TextReaderShim getTextReaderShim(InputStream input) throws IOException;
-
-  class Factory {
-    private static HadoopShims SHIMS = null;
-
-    public static synchronized HadoopShims get() {
-      if (SHIMS == null) {
-        String[] versionParts = VersionInfo.getVersion().split("[.]");
-        int major = Integer.parseInt(versionParts[0]);
-        int minor = Integer.parseInt(versionParts[1]);
-        if (major < 2 || (major == 2 && minor < 3)) {
-          SHIMS = new HadoopShims_2_2();
-        } else {
-          SHIMS = new HadoopShimsCurrent();
-        }
-      }
-      return SHIMS;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java b/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
deleted file mode 100644
index 5c53f74..0000000
--- a/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
-import org.apache.hadoop.io.compress.zlib.ZlibDecompressor;
-
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-
-/**
- * Shims for recent versions of Hadoop
- */
-public class HadoopShimsCurrent implements HadoopShims {
-
-  private static class DirectDecompressWrapper implements DirectDecompressor {
-    private final org.apache.hadoop.io.compress.DirectDecompressor root;
-
-    DirectDecompressWrapper(org.apache.hadoop.io.compress.DirectDecompressor root) {
-      this.root = root;
-    }
-
-    public void decompress(ByteBuffer input,
-                           ByteBuffer output) throws IOException {
-      root.decompress(input, output);
-    }
-  }
-
-  public DirectDecompressor getDirectDecompressor(
-      DirectCompressionType codec) {
-    switch (codec) {
-      case ZLIB:
-        return new DirectDecompressWrapper
-            (new ZlibDecompressor.ZlibDirectDecompressor());
-      case ZLIB_NOHEADER:
-        return new DirectDecompressWrapper
-            (new ZlibDecompressor.ZlibDirectDecompressor
-                (ZlibDecompressor.CompressionHeader.NO_HEADER, 0));
-      case SNAPPY:
-        return new DirectDecompressWrapper
-            (new SnappyDecompressor.SnappyDirectDecompressor());
-      default:
-        return null;
-    }
-  }
-
-  @Override
-  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
-                                              ByteBufferPoolShim pool
-                                              ) throws IOException {
-    return ZeroCopyShims.getZeroCopyReader(in, pool);
-  }
-
-  private final class FastTextReaderShim implements TextReaderShim {
-    private final DataInputStream din;
-
-    public FastTextReaderShim(InputStream in) {
-      this.din = new DataInputStream(in);
-    }
-
-    @Override
-    public void read(Text txt, int len) throws IOException {
-      txt.readWithKnownLength(din, len);
-    }
-  }
-
-  @Override
-  public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
-    return new FastTextReaderShim(in);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java b/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java
deleted file mode 100644
index 3f65e74..0000000
--- a/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.io.Text;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Method;
-
-/**
- * Shims for versions of Hadoop up to and including 2.2.x
- */
-public class HadoopShims_2_2 implements HadoopShims {
-
-  final boolean zeroCopy;
-  final boolean fastRead;
-
-  HadoopShims_2_2() {
-    boolean zcr = false;
-    try {
-      Class.forName("org.apache.hadoop.fs.CacheFlag", false,
-        HadoopShims_2_2.class.getClassLoader());
-      zcr = true;
-    } catch (ClassNotFoundException ce) {
-    }
-    zeroCopy = zcr;
-    boolean fastRead = false;
-    if (zcr) {
-      for (Method m : Text.class.getMethods()) {
-        if ("readWithKnownLength".equals(m.getName())) {
-          fastRead = true;
-        }
-      }
-    }
-    this.fastRead = fastRead;
-  }
-
-  public DirectDecompressor getDirectDecompressor(
-      DirectCompressionType codec) {
-    return null;
-  }
-
-  @Override
-  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
-                                              ByteBufferPoolShim pool
-                                              ) throws IOException {
-    if(zeroCopy) {
-      return ZeroCopyShims.getZeroCopyReader(in, pool);
-    }
-    /* not supported */
-    return null;
-  }
-
-  private final class BasicTextReaderShim implements TextReaderShim {
-    private final InputStream in;
-
-    public BasicTextReaderShim(InputStream in) {
-      this.in = in;
-    }
-
-    @Override
-    public void read(Text txt, int len) throws IOException {
-      int offset = 0;
-      byte[] bytes = new byte[len];
-      while (len > 0) {
-        int written = in.read(bytes, offset, len);
-        if (written < 0) {
-          throw new EOFException("Can't finish read from " + in + " read "
-              + (offset) + " bytes out of " + bytes.length);
-        }
-        len -= written;
-        offset += written;
-      }
-      txt.set(bytes);
-    }
-  }
-
-  @Override
-  public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
-    return new BasicTextReaderShim(in);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/InStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/InStream.java b/orc/src/java/org/apache/orc/impl/InStream.java
deleted file mode 100644
index 851f645..0000000
--- a/orc/src/java/org/apache/orc/impl/InStream.java
+++ /dev/null
@@ -1,498 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.ListIterator;
-
-import org.apache.orc.CompressionCodec;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.common.io.DiskRange;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.protobuf.CodedInputStream;
-
-public abstract class InStream extends InputStream {
-
-  private static final Logger LOG = LoggerFactory.getLogger(InStream.class);
-  public static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
-
-  protected final String name;
-  protected long length;
-
-  public InStream(String name, long length) {
-    this.name = name;
-    this.length = length;
-  }
-
-  public String getStreamName() {
-    return name;
-  }
-
-  public long getStreamLength() {
-    return length;
-  }
-
-  @Override
-  public abstract void close();
-
-  public static class UncompressedStream extends InStream {
-    private List<DiskRange> bytes;
-    private long length;
-    protected long currentOffset;
-    private ByteBuffer range;
-    private int currentRange;
-
-    public UncompressedStream(String name, List<DiskRange> input, long length) {
-      super(name, length);
-      reset(input, length);
-    }
-
-    protected void reset(List<DiskRange> input, long length) {
-      this.bytes = input;
-      this.length = length;
-      currentRange = 0;
-      currentOffset = 0;
-      range = null;
-    }
-
-    @Override
-    public int read() {
-      if (range == null || range.remaining() == 0) {
-        if (currentOffset == length) {
-          return -1;
-        }
-        seek(currentOffset);
-      }
-      currentOffset += 1;
-      return 0xff & range.get();
-    }
-
-    @Override
-    public int read(byte[] data, int offset, int length) {
-      if (range == null || range.remaining() == 0) {
-        if (currentOffset == this.length) {
-          return -1;
-        }
-        seek(currentOffset);
-      }
-      int actualLength = Math.min(length, range.remaining());
-      range.get(data, offset, actualLength);
-      currentOffset += actualLength;
-      return actualLength;
-    }
-
-    @Override
-    public int available() {
-      if (range != null && range.remaining() > 0) {
-        return range.remaining();
-      }
-      return (int) (length - currentOffset);
-    }
-
-    @Override
-    public void close() {
-      currentRange = bytes.size();
-      currentOffset = length;
-      // explicit de-ref of bytes[]
-      bytes.clear();
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      seek(index.getNext());
-    }
-
-    public void seek(long desired) {
-      if (desired == 0 && bytes.isEmpty()) {
-        logEmptySeek(name);
-        return;
-      }
-      int i = 0;
-      for (DiskRange curRange : bytes) {
-        if (desired == 0 && curRange.getData().remaining() == 0) {
-          logEmptySeek(name);
-          return;
-        }
-        if (curRange.getOffset() <= desired &&
-            (desired - curRange.getOffset()) < curRange.getLength()) {
-          currentOffset = desired;
-          currentRange = i;
-          this.range = curRange.getData().duplicate();
-          int pos = range.position();
-          pos += (int)(desired - curRange.getOffset()); // this is why we duplicate
-          this.range.position(pos);
-          return;
-        }
-        ++i;
-      }
-      // if they are seeking to the precise end, go ahead and let them go there
-      int segments = bytes.size();
-      if (segments != 0 && desired == bytes.get(segments - 1).getEnd()) {
-        currentOffset = desired;
-        currentRange = segments - 1;
-        DiskRange curRange = bytes.get(currentRange);
-        this.range = curRange.getData().duplicate();
-        int pos = range.position();
-        pos += (int)(desired - curRange.getOffset()); // this is why we duplicate
-        this.range.position(pos);
-        return;
-      }
-      throw new IllegalArgumentException("Seek in " + name + " to " +
-        desired + " is outside of the data");
-    }
-
-    @Override
-    public String toString() {
-      return "uncompressed stream " + name + " position: " + currentOffset +
-          " length: " + length + " range: " + currentRange +
-          " offset: " + (range == null ? 0 : range.position()) + " limit: " + (range == null ? 0 : range.limit());
-    }
-  }
-
-  private static ByteBuffer allocateBuffer(int size, boolean isDirect) {
-    // TODO: use the same pool as the ORC readers
-    if (isDirect) {
-      return ByteBuffer.allocateDirect(size);
-    } else {
-      return ByteBuffer.allocate(size);
-    }
-  }
-
-  private static class CompressedStream extends InStream {
-    private final List<DiskRange> bytes;
-    private final int bufferSize;
-    private ByteBuffer uncompressed;
-    private final CompressionCodec codec;
-    private ByteBuffer compressed;
-    private long currentOffset;
-    private int currentRange;
-    private boolean isUncompressedOriginal;
-
-    public CompressedStream(String name, List<DiskRange> input, long length,
-                            CompressionCodec codec, int bufferSize) {
-      super(name, length);
-      this.bytes = input;
-      this.codec = codec;
-      this.bufferSize = bufferSize;
-      currentOffset = 0;
-      currentRange = 0;
-    }
-
-    private void allocateForUncompressed(int size, boolean isDirect) {
-      uncompressed = allocateBuffer(size, isDirect);
-    }
-
-    private void readHeader() throws IOException {
-      if (compressed == null || compressed.remaining() <= 0) {
-        seek(currentOffset);
-      }
-      if (compressed.remaining() > OutStream.HEADER_SIZE) {
-        int b0 = compressed.get() & 0xff;
-        int b1 = compressed.get() & 0xff;
-        int b2 = compressed.get() & 0xff;
-        boolean isOriginal = (b0 & 0x01) == 1;
-        int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >> 1);
-
-        if (chunkLength > bufferSize) {
-          throw new IllegalArgumentException("Buffer size too small. size = " +
-              bufferSize + " needed = " + chunkLength);
-        }
-        // read 3 bytes, which should be equal to OutStream.HEADER_SIZE always
-        assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream";
-        currentOffset += OutStream.HEADER_SIZE;
-
-        ByteBuffer slice = this.slice(chunkLength);
-
-        if (isOriginal) {
-          uncompressed = slice;
-          isUncompressedOriginal = true;
-        } else {
-          if (isUncompressedOriginal) {
-            allocateForUncompressed(bufferSize, slice.isDirect());
-            isUncompressedOriginal = false;
-          } else if (uncompressed == null) {
-            allocateForUncompressed(bufferSize, slice.isDirect());
-          } else {
-            uncompressed.clear();
-          }
-          codec.decompress(slice, uncompressed);
-         }
-      } else {
-        throw new IllegalStateException("Can't read header at " + this);
-      }
-    }
-
-    @Override
-    public int read() throws IOException {
-      if (uncompressed == null || uncompressed.remaining() == 0) {
-        if (currentOffset == length) {
-          return -1;
-        }
-        readHeader();
-      }
-      return 0xff & uncompressed.get();
-    }
-
-    @Override
-    public int read(byte[] data, int offset, int length) throws IOException {
-      if (uncompressed == null || uncompressed.remaining() == 0) {
-        if (currentOffset == this.length) {
-          return -1;
-        }
-        readHeader();
-      }
-      int actualLength = Math.min(length, uncompressed.remaining());
-      uncompressed.get(data, offset, actualLength);
-      return actualLength;
-    }
-
-    @Override
-    public int available() throws IOException {
-      if (uncompressed == null || uncompressed.remaining() == 0) {
-        if (currentOffset == length) {
-          return 0;
-        }
-        readHeader();
-      }
-      return uncompressed.remaining();
-    }
-
-    @Override
-    public void close() {
-      uncompressed = null;
-      compressed = null;
-      currentRange = bytes.size();
-      currentOffset = length;
-      bytes.clear();
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      seek(index.getNext());
-      long uncompressedBytes = index.getNext();
-      if (uncompressedBytes != 0) {
-        readHeader();
-        uncompressed.position(uncompressed.position() +
-                              (int) uncompressedBytes);
-      } else if (uncompressed != null) {
-        // mark the uncompressed buffer as done
-        uncompressed.position(uncompressed.limit());
-      }
-    }
-
-    /* slices a read only contiguous buffer of chunkLength */
-    private ByteBuffer slice(int chunkLength) throws IOException {
-      int len = chunkLength;
-      final long oldOffset = currentOffset;
-      ByteBuffer slice;
-      if (compressed.remaining() >= len) {
-        slice = compressed.slice();
-        // simple case
-        slice.limit(len);
-        currentOffset += len;
-        compressed.position(compressed.position() + len);
-        return slice;
-      } else if (currentRange >= (bytes.size() - 1)) {
-        // nothing has been modified yet
-        throw new IOException("EOF in " + this + " while trying to read " +
-            chunkLength + " bytes");
-      }
-
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(String.format(
-            "Crossing into next BufferChunk because compressed only has %d bytes (needs %d)",
-            compressed.remaining(), len));
-      }
-
-      // we need to consolidate 2 or more buffers into 1
-      // first copy out compressed buffers
-      ByteBuffer copy = allocateBuffer(chunkLength, compressed.isDirect());
-      currentOffset += compressed.remaining();
-      len -= compressed.remaining();
-      copy.put(compressed);
-      ListIterator<DiskRange> iter = bytes.listIterator(currentRange);
-
-      while (len > 0 && iter.hasNext()) {
-        ++currentRange;
-        if (LOG.isDebugEnabled()) {
-          LOG.debug(String.format("Read slow-path, >1 cross block reads with %s", this.toString()));
-        }
-        DiskRange range = iter.next();
-        compressed = range.getData().duplicate();
-        if (compressed.remaining() >= len) {
-          slice = compressed.slice();
-          slice.limit(len);
-          copy.put(slice);
-          currentOffset += len;
-          compressed.position(compressed.position() + len);
-          return copy;
-        }
-        currentOffset += compressed.remaining();
-        len -= compressed.remaining();
-        copy.put(compressed);
-      }
-
-      // restore offsets for exception clarity
-      seek(oldOffset);
-      throw new IOException("EOF in " + this + " while trying to read " +
-          chunkLength + " bytes");
-    }
-
-    private void seek(long desired) throws IOException {
-      if (desired == 0 && bytes.isEmpty()) {
-        logEmptySeek(name);
-        return;
-      }
-      int i = 0;
-      for (DiskRange range : bytes) {
-        if (range.getOffset() <= desired && desired < range.getEnd()) {
-          currentRange = i;
-          compressed = range.getData().duplicate();
-          int pos = compressed.position();
-          pos += (int)(desired - range.getOffset());
-          compressed.position(pos);
-          currentOffset = desired;
-          return;
-        }
-        ++i;
-      }
-      // if they are seeking to the precise end, go ahead and let them go there
-      int segments = bytes.size();
-      if (segments != 0 && desired == bytes.get(segments - 1).getEnd()) {
-        DiskRange range = bytes.get(segments - 1);
-        currentRange = segments - 1;
-        compressed = range.getData().duplicate();
-        compressed.position(compressed.limit());
-        currentOffset = desired;
-        return;
-      }
-      throw new IOException("Seek outside of data in " + this + " to " + desired);
-    }
-
-    private String rangeString() {
-      StringBuilder builder = new StringBuilder();
-      int i = 0;
-      for (DiskRange range : bytes) {
-        if (i != 0) {
-          builder.append("; ");
-        }
-        builder.append(" range " + i + " = " + range.getOffset()
-            + " to " + (range.getEnd() - range.getOffset()));
-        ++i;
-      }
-      return builder.toString();
-    }
-
-    @Override
-    public String toString() {
-      return "compressed stream " + name + " position: " + currentOffset +
-          " length: " + length + " range: " + currentRange +
-          " offset: " + (compressed == null ? 0 : compressed.position()) + " limit: " + (compressed == null ? 0 : compressed.limit()) +
-          rangeString() +
-          (uncompressed == null ? "" :
-              " uncompressed: " + uncompressed.position() + " to " +
-                  uncompressed.limit());
-    }
-  }
-
-  public abstract void seek(PositionProvider index) throws IOException;
-
-  private static void logEmptySeek(String name) {
-    if (LOG.isWarnEnabled()) {
-      LOG.warn("Attempting seek into empty stream (" + name + ") Skipping stream.");
-    }
-  }
-
-  /**
-   * Create an input stream from a list of buffers.
-   * @param streamName the name of the stream
-   * @param buffers the list of ranges of bytes for the stream
-   * @param offsets a list of offsets (the same length as input) that must
-   *                contain the first offset of the each set of bytes in input
-   * @param length the length in bytes of the stream
-   * @param codec the compression codec
-   * @param bufferSize the compression buffer size
-   * @return an input stream
-   * @throws IOException
-   */
-  @VisibleForTesting
-  @Deprecated
-  public static InStream create(String streamName,
-                                ByteBuffer[] buffers,
-                                long[] offsets,
-                                long length,
-                                CompressionCodec codec,
-                                int bufferSize) throws IOException {
-    List<DiskRange> input = new ArrayList<DiskRange>(buffers.length);
-    for (int i = 0; i < buffers.length; ++i) {
-      input.add(new BufferChunk(buffers[i], offsets[i]));
-    }
-    return create(streamName, input, length, codec, bufferSize);
-  }
-
-  /**
-   * Create an input stream from a list of disk ranges with data.
-   * @param name the name of the stream
-   * @param input the list of ranges of bytes for the stream; from disk or cache
-   * @param length the length in bytes of the stream
-   * @param codec the compression codec
-   * @param bufferSize the compression buffer size
-   * @return an input stream
-   * @throws IOException
-   */
-  public static InStream create(String name,
-                                List<DiskRange> input,
-                                long length,
-                                CompressionCodec codec,
-                                int bufferSize) throws IOException {
-    if (codec == null) {
-      return new UncompressedStream(name, input, length);
-    } else {
-      return new CompressedStream(name, input, length, codec, bufferSize);
-    }
-  }
-
-  /**
-   * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
-   *
-   * @param name       the name of the stream
-   * @param input      the list of ranges of bytes for the stream; from disk or cache
-   * @param length     the length in bytes of the stream
-   * @param codec      the compression codec
-   * @param bufferSize the compression buffer size
-   * @return coded input stream
-   * @throws IOException
-   */
-  public static CodedInputStream createCodedInputStream(
-      String name,
-      List<DiskRange> input,
-      long length,
-      CompressionCodec codec,
-      int bufferSize) throws IOException {
-    InStream inStream = create(name, input, length, codec, bufferSize);
-    CodedInputStream codedInputStream = CodedInputStream.newInstance(inStream);
-    codedInputStream.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
-    return codedInputStream;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/IntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/IntegerReader.java b/orc/src/java/org/apache/orc/impl/IntegerReader.java
deleted file mode 100644
index 3e64d54..0000000
--- a/orc/src/java/org/apache/orc/impl/IntegerReader.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-
-/**
- * Interface for reading integers.
- */
-public interface IntegerReader {
-
-  /**
-   * Seek to the position provided by index.
-   * @param index
-   * @throws IOException
-   */
-  void seek(PositionProvider index) throws IOException;
-
-  /**
-   * Skip number of specified rows.
-   * @param numValues
-   * @throws IOException
-   */
-  void skip(long numValues) throws IOException;
-
-  /**
-   * Check if there are any more values left.
-   * @return
-   * @throws IOException
-   */
-  boolean hasNext() throws IOException;
-
-  /**
-   * Return the next available value.
-   * @return
-   * @throws IOException
-   */
-  long next() throws IOException;
-
-  /**
-   * Return the next available vector for values.
-   * @param column the column being read
-   * @param data the vector to read into
-   * @param length the number of numbers to read
-   * @throws IOException
-   */
-   void nextVector(ColumnVector column,
-                   long[] data,
-                   int length
-                   ) throws IOException;
-
-  /**
-   * Return the next available vector for values. Does not change the
-   * value of column.isRepeating.
-   * @param column the column being read
-   * @param data the vector to read into
-   * @param length the number of numbers to read
-   * @throws IOException
-   */
-  void nextVector(ColumnVector column,
-                  int[] data,
-                  int length
-                  ) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/IntegerWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/IntegerWriter.java b/orc/src/java/org/apache/orc/impl/IntegerWriter.java
deleted file mode 100644
index 419054f..0000000
--- a/orc/src/java/org/apache/orc/impl/IntegerWriter.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.io.IOException;
-
-/**
- * Interface for writing integers.
- */
-public interface IntegerWriter {
-
-  /**
-   * Get position from the stream.
-   * @param recorder
-   * @throws IOException
-   */
-  void getPosition(PositionRecorder recorder) throws IOException;
-
-  /**
-   * Write the integer value
-   * @param value
-   * @throws IOException
-   */
-  void write(long value) throws IOException;
-
-  /**
-   * Flush the buffer
-   * @throws IOException
-   */
-  void flush() throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/MemoryManager.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/MemoryManager.java b/orc/src/java/org/apache/orc/impl/MemoryManager.java
deleted file mode 100644
index 757c0b4..0000000
--- a/orc/src/java/org/apache/orc/impl/MemoryManager.java
+++ /dev/null
@@ -1,214 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.OrcConf;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-import java.lang.management.ManagementFactory;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.locks.ReentrantLock;
-
-/**
- * Implements a memory manager that keeps a global context of how many ORC
- * writers there are and manages the memory between them. For use cases with
- * dynamic partitions, it is easy to end up with many writers in the same task.
- * By managing the size of each allocation, we try to cut down the size of each
- * allocation and keep the task from running out of memory.
- * 
- * This class is not thread safe, but is re-entrant - ensure creation and all
- * invocations are triggered from the same thread.
- */
-public class MemoryManager {
-
-  private static final Logger LOG = LoggerFactory.getLogger(MemoryManager.class);
-
-  /**
-   * How often should we check the memory sizes? Measured in rows added
-   * to all of the writers.
-   */
-  private static final int ROWS_BETWEEN_CHECKS = 5000;
-  private final long totalMemoryPool;
-  private final Map<Path, WriterInfo> writerList =
-      new HashMap<Path, WriterInfo>();
-  private long totalAllocation = 0;
-  private double currentScale = 1;
-  private int rowsAddedSinceCheck = 0;
-  private final OwnedLock ownerLock = new OwnedLock();
-
-  @SuppressWarnings("serial")
-  private static class OwnedLock extends ReentrantLock {
-    public Thread getOwner() {
-      return super.getOwner();
-    }
-  }
-
-  private static class WriterInfo {
-    long allocation;
-    Callback callback;
-    WriterInfo(long allocation, Callback callback) {
-      this.allocation = allocation;
-      this.callback = callback;
-    }
-  }
-
-  public interface Callback {
-    /**
-     * The writer needs to check its memory usage
-     * @param newScale the current scale factor for memory allocations
-     * @return true if the writer was over the limit
-     * @throws IOException
-     */
-    boolean checkMemory(double newScale) throws IOException;
-  }
-
-  /**
-   * Create the memory manager.
-   * @param conf use the configuration to find the maximum size of the memory
-   *             pool.
-   */
-  public MemoryManager(Configuration conf) {
-    double maxLoad = OrcConf.MEMORY_POOL.getDouble(conf);
-    totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().
-        getHeapMemoryUsage().getMax() * maxLoad);
-    ownerLock.lock();
-  }
-
-  /**
-   * Light weight thread-safety check for multi-threaded access patterns
-   */
-  private void checkOwner() {
-    if (!ownerLock.isHeldByCurrentThread()) {
-      LOG.warn("Owner thread expected {}, got {}",
-          ownerLock.getOwner(), Thread.currentThread());
-    }
-  }
-
-  /**
-   * Add a new writer's memory allocation to the pool. We use the path
-   * as a unique key to ensure that we don't get duplicates.
-   * @param path the file that is being written
-   * @param requestedAllocation the requested buffer size
-   */
-  public void addWriter(Path path, long requestedAllocation,
-                              Callback callback) throws IOException {
-    checkOwner();
-    WriterInfo oldVal = writerList.get(path);
-    // this should always be null, but we handle the case where the memory
-    // manager wasn't told that a writer wasn't still in use and the task
-    // starts writing to the same path.
-    if (oldVal == null) {
-      oldVal = new WriterInfo(requestedAllocation, callback);
-      writerList.put(path, oldVal);
-      totalAllocation += requestedAllocation;
-    } else {
-      // handle a new writer that is writing to the same path
-      totalAllocation += requestedAllocation - oldVal.allocation;
-      oldVal.allocation = requestedAllocation;
-      oldVal.callback = callback;
-    }
-    updateScale(true);
-  }
-
-  /**
-   * Remove the given writer from the pool.
-   * @param path the file that has been closed
-   */
-  public void removeWriter(Path path) throws IOException {
-    checkOwner();
-    WriterInfo val = writerList.get(path);
-    if (val != null) {
-      writerList.remove(path);
-      totalAllocation -= val.allocation;
-      if (writerList.isEmpty()) {
-        rowsAddedSinceCheck = 0;
-      }
-      updateScale(false);
-    }
-    if(writerList.isEmpty()) {
-      rowsAddedSinceCheck = 0;
-    }
-  }
-
-  /**
-   * Get the total pool size that is available for ORC writers.
-   * @return the number of bytes in the pool
-   */
-  public long getTotalMemoryPool() {
-    return totalMemoryPool;
-  }
-
-  /**
-   * The scaling factor for each allocation to ensure that the pool isn't
-   * oversubscribed.
-   * @return a fraction between 0.0 and 1.0 of the requested size that is
-   * available for each writer.
-   */
-  public double getAllocationScale() {
-    return currentScale;
-  }
-
-  /**
-   * Give the memory manager an opportunity for doing a memory check.
-   * @param rows number of rows added
-   * @throws IOException
-   */
-  public void addedRow(int rows) throws IOException {
-    rowsAddedSinceCheck += rows;
-    if (rowsAddedSinceCheck >= ROWS_BETWEEN_CHECKS) {
-      notifyWriters();
-    }
-  }
-
-  /**
-   * Notify all of the writers that they should check their memory usage.
-   * @throws IOException
-   */
-  public void notifyWriters() throws IOException {
-    checkOwner();
-    LOG.debug("Notifying writers after " + rowsAddedSinceCheck);
-    for(WriterInfo writer: writerList.values()) {
-      boolean flushed = writer.callback.checkMemory(currentScale);
-      if (LOG.isDebugEnabled() && flushed) {
-        LOG.debug("flushed " + writer.toString());
-      }
-    }
-    rowsAddedSinceCheck = 0;
-  }
-
-  /**
-   * Update the currentScale based on the current allocation and pool size.
-   * This also updates the notificationTrigger.
-   * @param isAllocate is this an allocation?
-   */
-  private void updateScale(boolean isAllocate) throws IOException {
-    if (totalAllocation <= totalMemoryPool) {
-      currentScale = 1;
-    } else {
-      currentScale = (double) totalMemoryPool / totalAllocation;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/OrcAcidUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/OrcAcidUtils.java b/orc/src/java/org/apache/orc/impl/OrcAcidUtils.java
deleted file mode 100644
index 7ca9e1d..0000000
--- a/orc/src/java/org/apache/orc/impl/OrcAcidUtils.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.Reader;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-
-public class OrcAcidUtils {
-  public static final String ACID_STATS = "hive.acid.stats";
-  public static final String DELTA_SIDE_FILE_SUFFIX = "_flush_length";
-
-  /**
-   * Get the filename of the ORC ACID side file that contains the lengths
-   * of the intermediate footers.
-   * @param main the main ORC filename
-   * @return the name of the side file
-   */
-  public static Path getSideFile(Path main) {
-    return new Path(main + DELTA_SIDE_FILE_SUFFIX);
-  }
-
-  /**
-   * Read the side file to get the last flush length.
-   * @param fs the file system to use
-   * @param deltaFile the path of the delta file
-   * @return the maximum size of the file to use
-   * @throws IOException
-   */
-  public static long getLastFlushLength(FileSystem fs,
-                                        Path deltaFile) throws IOException {
-    Path lengths = getSideFile(deltaFile);
-    long result = Long.MAX_VALUE;
-    if(!fs.exists(lengths)) {
-      return result;
-    }
-    try (FSDataInputStream stream = fs.open(lengths)) {
-      result = -1;
-      while (stream.available() > 0) {
-        result = stream.readLong();
-      }
-      return result;
-    } catch (IOException ioe) {
-      return result;
-    }
-  }
-
-  private static final Charset utf8 = Charset.forName("UTF-8");
-  private static final CharsetDecoder utf8Decoder = utf8.newDecoder();
-
-  public static AcidStats parseAcidStats(Reader reader) {
-    if (reader.hasMetadataValue(ACID_STATS)) {
-      try {
-        ByteBuffer val = reader.getMetadataValue(ACID_STATS).duplicate();
-        return new AcidStats(utf8Decoder.decode(val).toString());
-      } catch (CharacterCodingException e) {
-        throw new IllegalArgumentException("Bad string encoding for " +
-            ACID_STATS, e);
-      }
-    } else {
-      return null;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/OrcIndex.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/OrcIndex.java b/orc/src/java/org/apache/orc/impl/OrcIndex.java
deleted file mode 100644
index 50a15f2..0000000
--- a/orc/src/java/org/apache/orc/impl/OrcIndex.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.OrcProto;
-
-public final class OrcIndex {
-  OrcProto.RowIndex[] rowGroupIndex;
-  OrcProto.BloomFilterIndex[] bloomFilterIndex;
-
-  public OrcIndex(OrcProto.RowIndex[] rgIndex, OrcProto.BloomFilterIndex[] bfIndex) {
-    this.rowGroupIndex = rgIndex;
-    this.bloomFilterIndex = bfIndex;
-  }
-
-  public OrcProto.RowIndex[] getRowGroupIndex() {
-    return rowGroupIndex;
-  }
-
-  public OrcProto.BloomFilterIndex[] getBloomFilterIndex() {
-    return bloomFilterIndex;
-  }
-
-  public void setRowGroupIndex(OrcProto.RowIndex[] rowGroupIndex) {
-    this.rowGroupIndex = rowGroupIndex;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/OrcTail.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/OrcTail.java b/orc/src/java/org/apache/orc/impl/OrcTail.java
deleted file mode 100644
index f095603..0000000
--- a/orc/src/java/org/apache/orc/impl/OrcTail.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import static org.apache.orc.impl.ReaderImpl.extractMetadata;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcFile;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-
-// TODO: Make OrcTail implement FileMetadata or Reader interface
-public final class OrcTail {
-  // postscript + footer - Serialized in OrcSplit
-  private final OrcProto.FileTail fileTail;
-  // serialized representation of metadata, footer and postscript
-  private final ByteBuffer serializedTail;
-  // used to invalidate cache entries
-  private final long fileModificationTime;
-  // lazily deserialized
-  private OrcProto.Metadata metadata;
-
-  public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
-    this(fileTail, serializedTail, -1);
-  }
-
-  public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
-    this.fileTail = fileTail;
-    this.serializedTail = serializedTail;
-    this.fileModificationTime = fileModificationTime;
-    this.metadata = null;
-  }
-
-  public ByteBuffer getSerializedTail() {
-    return serializedTail;
-  }
-
-  public long getFileModificationTime() {
-    return fileModificationTime;
-  }
-
-  public OrcProto.Footer getFooter() {
-    return fileTail.getFooter();
-  }
-
-  public OrcProto.PostScript getPostScript() {
-    return fileTail.getPostscript();
-  }
-
-  public OrcFile.WriterVersion getWriterVersion() {
-    OrcProto.PostScript ps = fileTail.getPostscript();
-    return (ps.hasWriterVersion()
-        ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
-  }
-
-  public List<StripeInformation> getStripes() {
-    List<StripeInformation> result = new ArrayList<>(fileTail.getFooter().getStripesCount());
-    for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
-      result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
-    }
-    return result;
-  }
-
-  public CompressionKind getCompressionKind() {
-    return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
-  }
-
-  public CompressionCodec getCompressionCodec() {
-    return PhysicalFsWriter.createCodec(getCompressionKind());
-  }
-
-  public int getCompressionBufferSize() {
-    return (int) fileTail.getPostscript().getCompressionBlockSize();
-  }
-
-  public List<StripeStatistics> getStripeStatistics() throws IOException {
-    List<StripeStatistics> result = new ArrayList<>();
-    List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
-    if (ssProto != null) {
-      for (OrcProto.StripeStatistics ss : ssProto) {
-        result.add(new StripeStatistics(ss.getColStatsList()));
-      }
-    }
-    return result;
-  }
-
-  public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
-    if (serializedTail == null) return null;
-    if (metadata == null) {
-      metadata = extractMetadata(serializedTail, 0,
-          (int) fileTail.getPostscript().getMetadataLength(),
-          getCompressionCodec(), getCompressionBufferSize());
-      // clear does not clear the contents but sets position to 0 and limit = capacity
-      serializedTail.clear();
-    }
-    return metadata.getStripeStatsList();
-  }
-
-  public int getMetadataSize() {
-    return (int) getPostScript().getMetadataLength();
-  }
-
-  public List<OrcProto.Type> getTypes() {
-    return getFooter().getTypesList();
-  }
-
-  public OrcProto.FileTail getFileTail() {
-    return fileTail;
-  }
-
-  public OrcProto.FileTail getMinimalFileTail() {
-    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
-    OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
-    footerBuilder.clearStatistics();
-    fileTailBuilder.setFooter(footerBuilder.build());
-    OrcProto.FileTail result = fileTailBuilder.build();
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/OutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/OutStream.java b/orc/src/java/org/apache/orc/impl/OutStream.java
deleted file mode 100644
index 81662cc..0000000
--- a/orc/src/java/org/apache/orc/impl/OutStream.java
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import org.apache.orc.CompressionCodec;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-public class OutStream extends PositionedOutputStream {
-
-  public interface OutputReceiver {
-    /**
-     * Output the given buffer to the final destination
-     * @param buffer the buffer to output
-     * @throws IOException
-     */
-    void output(ByteBuffer buffer) throws IOException;
-  }
-
-  public static final int HEADER_SIZE = 3;
-  private final String name;
-  private final OutputReceiver receiver;
-  // if enabled the stream will be suppressed when writing stripe
-  private boolean suppress;
-
-  /**
-   * Stores the uncompressed bytes that have been serialized, but not
-   * compressed yet. When this fills, we compress the entire buffer.
-   */
-  private ByteBuffer current = null;
-
-  /**
-   * Stores the compressed bytes until we have a full buffer and then outputs
-   * them to the receiver. If no compression is being done, this (and overflow)
-   * will always be null and the current buffer will be sent directly to the
-   * receiver.
-   */
-  private ByteBuffer compressed = null;
-
-  /**
-   * Since the compressed buffer may start with contents from previous
-   * compression blocks, we allocate an overflow buffer so that the
-   * output of the codec can be split between the two buffers. After the
-   * compressed buffer is sent to the receiver, the overflow buffer becomes
-   * the new compressed buffer.
-   */
-  private ByteBuffer overflow = null;
-  private final int bufferSize;
-  private final CompressionCodec codec;
-  private long compressedBytes = 0;
-  private long uncompressedBytes = 0;
-
-  public OutStream(String name,
-                   int bufferSize,
-                   CompressionCodec codec,
-                   OutputReceiver receiver) throws IOException {
-    this.name = name;
-    this.bufferSize = bufferSize;
-    this.codec = codec;
-    this.receiver = receiver;
-    this.suppress = false;
-  }
-
-  public void clear() throws IOException {
-    flush();
-    suppress = false;
-  }
-
-  /**
-   * Write the length of the compressed bytes. Life is much easier if the
-   * header is constant length, so just use 3 bytes. Considering most of the
-   * codecs want between 32k (snappy) and 256k (lzo, zlib), 3 bytes should
-   * be plenty. We also use the low bit for whether it is the original or
-   * compressed bytes.
-   * @param buffer the buffer to write the header to
-   * @param position the position in the buffer to write at
-   * @param val the size in the file
-   * @param original is it uncompressed
-   */
-  private static void writeHeader(ByteBuffer buffer,
-                                  int position,
-                                  int val,
-                                  boolean original) {
-    buffer.put(position, (byte) ((val << 1) + (original ? 1 : 0)));
-    buffer.put(position + 1, (byte) (val >> 7));
-    buffer.put(position + 2, (byte) (val >> 15));
-  }
-
-  private void getNewInputBuffer() throws IOException {
-    if (codec == null) {
-      current = ByteBuffer.allocate(bufferSize);
-    } else {
-      current = ByteBuffer.allocate(bufferSize + HEADER_SIZE);
-      writeHeader(current, 0, bufferSize, true);
-      current.position(HEADER_SIZE);
-    }
-  }
-
-  /**
-   * Allocate a new output buffer if we are compressing.
-   */
-  private ByteBuffer getNewOutputBuffer() throws IOException {
-    return ByteBuffer.allocate(bufferSize + HEADER_SIZE);
-  }
-
-  private void flip() throws IOException {
-    current.limit(current.position());
-    current.position(codec == null ? 0 : HEADER_SIZE);
-  }
-
-  @Override
-  public void write(int i) throws IOException {
-    if (current == null) {
-      getNewInputBuffer();
-    }
-    if (current.remaining() < 1) {
-      spill();
-    }
-    uncompressedBytes += 1;
-    current.put((byte) i);
-  }
-
-  @Override
-  public void write(byte[] bytes, int offset, int length) throws IOException {
-    if (current == null) {
-      getNewInputBuffer();
-    }
-    int remaining = Math.min(current.remaining(), length);
-    current.put(bytes, offset, remaining);
-    uncompressedBytes += remaining;
-    length -= remaining;
-    while (length != 0) {
-      spill();
-      offset += remaining;
-      remaining = Math.min(current.remaining(), length);
-      current.put(bytes, offset, remaining);
-      uncompressedBytes += remaining;
-      length -= remaining;
-    }
-  }
-
-  private void spill() throws java.io.IOException {
-    // if there isn't anything in the current buffer, don't spill
-    if (current == null ||
-        current.position() == (codec == null ? 0 : HEADER_SIZE)) {
-      return;
-    }
-    flip();
-    if (codec == null) {
-      receiver.output(current);
-      getNewInputBuffer();
-    } else {
-      if (compressed == null) {
-        compressed = getNewOutputBuffer();
-      } else if (overflow == null) {
-        overflow = getNewOutputBuffer();
-      }
-      int sizePosn = compressed.position();
-      compressed.position(compressed.position() + HEADER_SIZE);
-      if (codec.compress(current, compressed, overflow)) {
-        uncompressedBytes = 0;
-        // move position back to after the header
-        current.position(HEADER_SIZE);
-        current.limit(current.capacity());
-        // find the total bytes in the chunk
-        int totalBytes = compressed.position() - sizePosn - HEADER_SIZE;
-        if (overflow != null) {
-          totalBytes += overflow.position();
-        }
-        compressedBytes += totalBytes + HEADER_SIZE;
-        writeHeader(compressed, sizePosn, totalBytes, false);
-        // if we have less than the next header left, spill it.
-        if (compressed.remaining() < HEADER_SIZE) {
-          compressed.flip();
-          receiver.output(compressed);
-          compressed = overflow;
-          overflow = null;
-        }
-      } else {
-        compressedBytes += uncompressedBytes + HEADER_SIZE;
-        uncompressedBytes = 0;
-        // we are using the original, but need to spill the current
-        // compressed buffer first. So back up to where we started,
-        // flip it and add it to done.
-        if (sizePosn != 0) {
-          compressed.position(sizePosn);
-          compressed.flip();
-          receiver.output(compressed);
-          compressed = null;
-          // if we have an overflow, clear it and make it the new compress
-          // buffer
-          if (overflow != null) {
-            overflow.clear();
-            compressed = overflow;
-            overflow = null;
-          }
-        } else {
-          compressed.clear();
-          if (overflow != null) {
-            overflow.clear();
-          }
-        }
-
-        // now add the current buffer into the done list and get a new one.
-        current.position(0);
-        // update the header with the current length
-        writeHeader(current, 0, current.limit() - HEADER_SIZE, true);
-        receiver.output(current);
-        getNewInputBuffer();
-      }
-    }
-  }
-
-  @Override
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    if (codec == null) {
-      recorder.addPosition(uncompressedBytes);
-    } else {
-      recorder.addPosition(compressedBytes);
-      recorder.addPosition(uncompressedBytes);
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    spill();
-    if (compressed != null && compressed.position() != 0) {
-      compressed.flip();
-      receiver.output(compressed);
-    }
-    compressed = null;
-    uncompressedBytes = 0;
-    compressedBytes = 0;
-    overflow = null;
-    current = null;
-  }
-
-  @Override
-  public String toString() {
-    return name;
-  }
-
-  @Override
-  public long getBufferSize() {
-    long result = 0;
-    if (current != null) {
-      result += current.capacity();
-    }
-    if (compressed != null) {
-      result += compressed.capacity();
-    }
-    if (overflow != null) {
-      result += overflow.capacity();
-    }
-    return result;
-  }
-
-  /**
-   * Set suppress flag
-   */
-  public void suppress() {
-    suppress = true;
-  }
-
-  /**
-   * Returns the state of suppress flag
-   * @return value of suppress flag
-   */
-  public boolean isSuppressed() {
-    return suppress;
-  }
-}
-


[18/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java b/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
deleted file mode 100644
index 820674b..0000000
--- a/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ /dev/null
@@ -1,1238 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.DataReader;
-import org.apache.orc.DateColumnStatistics;
-import org.apache.orc.DecimalColumnStatistics;
-import org.apache.orc.DoubleColumnStatistics;
-import org.apache.orc.IntegerColumnStatistics;
-import org.apache.orc.OrcConf;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.TimestampColumnStatistics;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.ql.util.TimestampUtils;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.OrcProto;
-
-public class RecordReaderImpl implements RecordReader {
-  static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class);
-  private static final boolean isLogDebugEnabled = LOG.isDebugEnabled();
-  private static final Object UNKNOWN_VALUE = new Object();
-  protected final Path path;
-  private final long firstRow;
-  private final List<StripeInformation> stripes =
-      new ArrayList<StripeInformation>();
-  private OrcProto.StripeFooter stripeFooter;
-  private final long totalRowCount;
-  private final CompressionCodec codec;
-  protected final TypeDescription schema;
-  private final List<OrcProto.Type> types;
-  private final int bufferSize;
-  private final SchemaEvolution evolution;
-  // the file included columns indexed by the file's column ids.
-  private final boolean[] included;
-  private final long rowIndexStride;
-  private long rowInStripe = 0;
-  private int currentStripe = -1;
-  private long rowBaseInStripe = 0;
-  private long rowCountInStripe = 0;
-  private final Map<StreamName, InStream> streams =
-      new HashMap<StreamName, InStream>();
-  DiskRangeList bufferChunks = null;
-  private final TreeReaderFactory.TreeReader reader;
-  private final OrcProto.RowIndex[] indexes;
-  private final OrcProto.BloomFilterIndex[] bloomFilterIndices;
-  private final SargApplier sargApp;
-  // an array about which row groups aren't skipped
-  private boolean[] includedRowGroups = null;
-  private final DataReader dataReader;
-
-  /**
-   * Given a list of column names, find the given column and return the index.
-   *
-   * @param evolution the mapping from reader to file schema
-   * @param columnName  the column name to look for
-   * @return the file column id or -1 if the column wasn't found
-   */
-  static int findColumns(SchemaEvolution evolution,
-                         String columnName) {
-    TypeDescription readerSchema = evolution.getReaderBaseSchema();
-    List<String> fieldNames = readerSchema.getFieldNames();
-    List<TypeDescription> children = readerSchema.getChildren();
-    for (int i = 0; i < fieldNames.size(); ++i) {
-      if (columnName.equals(fieldNames.get(i))) {
-        TypeDescription result = evolution.getFileType(children.get(i).getId());
-        return result == null ? -1 : result.getId();
-      }
-    }
-    return -1;
-  }
-
-  /**
-   * Find the mapping from predicate leaves to columns.
-   * @param sargLeaves the search argument that we need to map
-   * @param evolution the mapping from reader to file schema
-   * @return an array mapping the sarg leaves to file column ids
-   */
-  public static int[] mapSargColumnsToOrcInternalColIdx(List<PredicateLeaf> sargLeaves,
-                             SchemaEvolution evolution) {
-    int[] result = new int[sargLeaves.size()];
-    Arrays.fill(result, -1);
-    for(int i=0; i < result.length; ++i) {
-      String colName = sargLeaves.get(i).getColumnName();
-      result[i] = findColumns(evolution, colName);
-    }
-    return result;
-  }
-
-  protected RecordReaderImpl(ReaderImpl fileReader,
-                             Reader.Options options) throws IOException {
-    boolean[] readerIncluded = options.getInclude();
-    if (options.getSchema() == null) {
-      if (LOG.isInfoEnabled()) {
-        LOG.info("Reader schema not provided -- using file schema " +
-            fileReader.getSchema());
-      }
-      evolution = new SchemaEvolution(fileReader.getSchema(), readerIncluded);
-    } else {
-
-      // Now that we are creating a record reader for a file, validate that the schema to read
-      // is compatible with the file schema.
-      //
-      evolution = new SchemaEvolution(fileReader.getSchema(),
-          options.getSchema(), readerIncluded);
-      if (LOG.isDebugEnabled() && evolution.hasConversion()) {
-        LOG.debug("ORC file " + fileReader.path.toString() +
-            " has data type conversion --\n" +
-            "reader schema: " + options.getSchema().toString() + "\n" +
-            "file schema:   " + fileReader.getSchema());
-      }
-    }
-    this.schema = evolution.getReaderSchema();
-    this.path = fileReader.path;
-    this.codec = fileReader.codec;
-    this.types = fileReader.types;
-    this.bufferSize = fileReader.bufferSize;
-    this.rowIndexStride = fileReader.rowIndexStride;
-    SearchArgument sarg = options.getSearchArgument();
-    if (sarg != null && rowIndexStride != 0) {
-      sargApp = new SargApplier(sarg, options.getColumnNames(), rowIndexStride,
-          evolution);
-    } else {
-      sargApp = null;
-    }
-    long rows = 0;
-    long skippedRows = 0;
-    long offset = options.getOffset();
-    long maxOffset = options.getMaxOffset();
-    for(StripeInformation stripe: fileReader.getStripes()) {
-      long stripeStart = stripe.getOffset();
-      if (offset > stripeStart) {
-        skippedRows += stripe.getNumberOfRows();
-      } else if (stripeStart < maxOffset) {
-        this.stripes.add(stripe);
-        rows += stripe.getNumberOfRows();
-      }
-    }
-
-    Boolean zeroCopy = options.getUseZeroCopy();
-    if (zeroCopy == null) {
-      zeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(fileReader.conf);
-    }
-    if (options.getDataReader() != null) {
-      this.dataReader = options.getDataReader();
-    } else {
-      this.dataReader = RecordReaderUtils.createDefaultDataReader(
-          DataReaderProperties.builder()
-              .withBufferSize(bufferSize)
-              .withCompression(fileReader.compressionKind)
-              .withFileSystem(fileReader.fileSystem)
-              .withPath(fileReader.path)
-              .withTypeCount(types.size())
-              .withZeroCopy(zeroCopy)
-              .build());
-    }
-    this.dataReader.open();
-
-    firstRow = skippedRows;
-    totalRowCount = rows;
-    Boolean skipCorrupt = options.getSkipCorruptRecords();
-    if (skipCorrupt == null) {
-      skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(fileReader.conf);
-    }
-
-    reader = TreeReaderFactory.createTreeReader(evolution.getReaderSchema(),
-        evolution, readerIncluded, skipCorrupt);
-    indexes = new OrcProto.RowIndex[types.size()];
-    bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
-    this.included = evolution.getFileIncluded();
-    advanceToNextRow(reader, 0L, true);
-  }
-
-  public static final class PositionProviderImpl implements PositionProvider {
-    private final OrcProto.RowIndexEntry entry;
-    private int index;
-
-    public PositionProviderImpl(OrcProto.RowIndexEntry entry) {
-      this(entry, 0);
-    }
-
-    public PositionProviderImpl(OrcProto.RowIndexEntry entry, int startPos) {
-      this.entry = entry;
-      this.index = startPos;
-    }
-
-    @Override
-    public long getNext() {
-      return entry.getPositions(index++);
-    }
-
-    @Override
-    public String toString() {
-      return "{" + entry.getPositionsList() + "; " + index + "}";
-    }
-  }
-
-  public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
-                                                ) throws IOException {
-    return dataReader.readStripeFooter(stripe);
-  }
-
-  enum Location {
-    BEFORE, MIN, MIDDLE, MAX, AFTER
-  }
-
-  /**
-   * Given a point and min and max, determine if the point is before, at the
-   * min, in the middle, at the max, or after the range.
-   * @param point the point to test
-   * @param min the minimum point
-   * @param max the maximum point
-   * @param <T> the type of the comparision
-   * @return the location of the point
-   */
-  static <T> Location compareToRange(Comparable<T> point, T min, T max) {
-    int minCompare = point.compareTo(min);
-    if (minCompare < 0) {
-      return Location.BEFORE;
-    } else if (minCompare == 0) {
-      return Location.MIN;
-    }
-    int maxCompare = point.compareTo(max);
-    if (maxCompare > 0) {
-      return Location.AFTER;
-    } else if (maxCompare == 0) {
-      return Location.MAX;
-    }
-    return Location.MIDDLE;
-  }
-
-  /**
-   * Get the maximum value out of an index entry.
-   * @param index
-   *          the index entry
-   * @return the object for the maximum value or null if there isn't one
-   */
-  static Object getMax(ColumnStatistics index) {
-    if (index instanceof IntegerColumnStatistics) {
-      return ((IntegerColumnStatistics) index).getMaximum();
-    } else if (index instanceof DoubleColumnStatistics) {
-      return ((DoubleColumnStatistics) index).getMaximum();
-    } else if (index instanceof StringColumnStatistics) {
-      return ((StringColumnStatistics) index).getMaximum();
-    } else if (index instanceof DateColumnStatistics) {
-      return ((DateColumnStatistics) index).getMaximum();
-    } else if (index instanceof DecimalColumnStatistics) {
-      return ((DecimalColumnStatistics) index).getMaximum();
-    } else if (index instanceof TimestampColumnStatistics) {
-      return ((TimestampColumnStatistics) index).getMaximum();
-    } else if (index instanceof BooleanColumnStatistics) {
-      if (((BooleanColumnStatistics)index).getTrueCount()!=0) {
-        return Boolean.TRUE;
-      } else {
-        return Boolean.FALSE;
-      }
-    } else {
-      return null;
-    }
-  }
-
-  /**
-   * Get the minimum value out of an index entry.
-   * @param index
-   *          the index entry
-   * @return the object for the minimum value or null if there isn't one
-   */
-  static Object getMin(ColumnStatistics index) {
-    if (index instanceof IntegerColumnStatistics) {
-      return ((IntegerColumnStatistics) index).getMinimum();
-    } else if (index instanceof DoubleColumnStatistics) {
-      return ((DoubleColumnStatistics) index).getMinimum();
-    } else if (index instanceof StringColumnStatistics) {
-      return ((StringColumnStatistics) index).getMinimum();
-    } else if (index instanceof DateColumnStatistics) {
-      return ((DateColumnStatistics) index).getMinimum();
-    } else if (index instanceof DecimalColumnStatistics) {
-      return ((DecimalColumnStatistics) index).getMinimum();
-    } else if (index instanceof TimestampColumnStatistics) {
-      return ((TimestampColumnStatistics) index).getMinimum();
-    } else if (index instanceof BooleanColumnStatistics) {
-      if (((BooleanColumnStatistics)index).getFalseCount()!=0) {
-        return Boolean.FALSE;
-      } else {
-        return Boolean.TRUE;
-      }
-    } else {
-      return UNKNOWN_VALUE; // null is not safe here
-    }
-  }
-
-  /**
-   * Evaluate a predicate with respect to the statistics from the column
-   * that is referenced in the predicate.
-   * @param statsProto the statistics for the column mentioned in the predicate
-   * @param predicate the leaf predicate we need to evaluation
-   * @param bloomFilter
-   * @return the set of truth values that may be returned for the given
-   *   predicate.
-   */
-  static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
-      PredicateLeaf predicate, OrcProto.BloomFilter bloomFilter) {
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
-    Object minValue = getMin(cs);
-    Object maxValue = getMax(cs);
-    BloomFilterIO bf = null;
-    if (bloomFilter != null) {
-      bf = new BloomFilterIO(bloomFilter);
-    }
-    return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(), bf);
-  }
-
-  /**
-   * Evaluate a predicate with respect to the statistics from the column
-   * that is referenced in the predicate.
-   * @param stats the statistics for the column mentioned in the predicate
-   * @param predicate the leaf predicate we need to evaluation
-   * @return the set of truth values that may be returned for the given
-   *   predicate.
-   */
-  public static TruthValue evaluatePredicate(ColumnStatistics stats,
-                                             PredicateLeaf predicate,
-                                             BloomFilterIO bloomFilter) {
-    Object minValue = getMin(stats);
-    Object maxValue = getMax(stats);
-    return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter);
-  }
-
-  static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
-      Object max, boolean hasNull, BloomFilterIO bloomFilter) {
-    // if we didn't have any values, everything must have been null
-    if (min == null) {
-      if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
-        return TruthValue.YES;
-      } else {
-        return TruthValue.NULL;
-      }
-    } else if (min == UNKNOWN_VALUE) {
-      return TruthValue.YES_NO_NULL;
-    }
-
-    // TODO: Enabling PPD for timestamp requires ORC-101 and ORC-135
-    if (min != null && min instanceof Timestamp) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Not using predication pushdown on {} because it doesn't " +
-          "include ORC-135.", predicate.getColumnName());
-      }
-      return TruthValue.YES_NO_NULL;
-    }
-
-    TruthValue result;
-    Object baseObj = predicate.getLiteral();
-    try {
-      // Predicate object and stats objects are converted to the type of the predicate object.
-      Object minValue = getBaseObjectForComparison(predicate.getType(), min);
-      Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
-      Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);
-
-      result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
-      if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) {
-        result = evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull);
-      }
-      // in case failed conversion, return the default YES_NO_NULL truth value
-    } catch (Exception e) {
-      if (LOG.isDebugEnabled()) {
-        final String statsType = min == null ?
-            (max == null ? "null" : max.getClass().getSimpleName()) :
-            min.getClass().getSimpleName();
-        final String predicateType = baseObj == null ? "null" : baseObj.getClass().getSimpleName();
-        final String reason = e.getClass().getSimpleName() + " when evaluating predicate." +
-            " Skipping ORC PPD." +
-            " Exception: " + e.getMessage() +
-            " StatsType: " + statsType +
-            " PredicateType: " + predicateType;
-        LOG.debug(reason);
-      }
-      if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS) || !hasNull) {
-        result = TruthValue.YES_NO;
-      } else {
-        result = TruthValue.YES_NO_NULL;
-      }
-    }
-    return result;
-  }
-
-  private static boolean shouldEvaluateBloomFilter(PredicateLeaf predicate,
-      TruthValue result, BloomFilterIO bloomFilter) {
-    // evaluate bloom filter only when
-    // 1) Bloom filter is available
-    // 2) Min/Max evaluation yield YES or MAYBE
-    // 3) Predicate is EQUALS or IN list
-    if (bloomFilter != null
-        && result != TruthValue.NO_NULL && result != TruthValue.NO
-        && (predicate.getOperator().equals(PredicateLeaf.Operator.EQUALS)
-            || predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-            || predicate.getOperator().equals(PredicateLeaf.Operator.IN))) {
-      return true;
-    }
-    return false;
-  }
-
-  private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj,
-      Object minValue,
-      Object maxValue,
-      boolean hasNull) {
-    Location loc;
-
-    switch (predicate.getOperator()) {
-      case NULL_SAFE_EQUALS:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
-        if (loc == Location.BEFORE || loc == Location.AFTER) {
-          return TruthValue.NO;
-        } else {
-          return TruthValue.YES_NO;
-        }
-      case EQUALS:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
-        if (minValue.equals(maxValue) && loc == Location.MIN) {
-          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
-        } else if (loc == Location.BEFORE || loc == Location.AFTER) {
-          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-        } else {
-          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-        }
-      case LESS_THAN:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
-        if (loc == Location.AFTER) {
-          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
-        } else if (loc == Location.BEFORE || loc == Location.MIN) {
-          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-        } else {
-          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-        }
-      case LESS_THAN_EQUALS:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
-        if (loc == Location.AFTER || loc == Location.MAX) {
-          return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
-        } else if (loc == Location.BEFORE) {
-          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-        } else {
-          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-        }
-      case IN:
-        if (minValue.equals(maxValue)) {
-          // for a single value, look through to see if that value is in the
-          // set
-          for (Object arg : predicate.getLiteralList()) {
-            predObj = getBaseObjectForComparison(predicate.getType(), arg);
-            loc = compareToRange((Comparable) predObj, minValue, maxValue);
-            if (loc == Location.MIN) {
-              return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
-            }
-          }
-          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-        } else {
-          // are all of the values outside of the range?
-          for (Object arg : predicate.getLiteralList()) {
-            predObj = getBaseObjectForComparison(predicate.getType(), arg);
-            loc = compareToRange((Comparable) predObj, minValue, maxValue);
-            if (loc == Location.MIN || loc == Location.MIDDLE ||
-                loc == Location.MAX) {
-              return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-            }
-          }
-          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-        }
-      case BETWEEN:
-        List<Object> args = predicate.getLiteralList();
-        Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0));
-
-        loc = compareToRange((Comparable) predObj1, minValue, maxValue);
-        if (loc == Location.BEFORE || loc == Location.MIN) {
-          Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1));
-
-          Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
-          if (loc2 == Location.AFTER || loc2 == Location.MAX) {
-            return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
-          } else if (loc2 == Location.BEFORE) {
-            return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-          } else {
-            return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-          }
-        } else if (loc == Location.AFTER) {
-          return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-        } else {
-          return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-        }
-      case IS_NULL:
-        // min = null condition above handles the all-nulls YES case
-        return hasNull ? TruthValue.YES_NO : TruthValue.NO;
-      default:
-        return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-    }
-  }
-
-  private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate,
-      final Object predObj, BloomFilterIO bloomFilter, boolean hasNull) {
-    switch (predicate.getOperator()) {
-      case NULL_SAFE_EQUALS:
-        // null safe equals does not return *_NULL variant. So set hasNull to false
-        return checkInBloomFilter(bloomFilter, predObj, false);
-      case EQUALS:
-        return checkInBloomFilter(bloomFilter, predObj, hasNull);
-      case IN:
-        for (Object arg : predicate.getLiteralList()) {
-          // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe
-          Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg);
-          TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull);
-          if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) {
-            return result;
-          }
-        }
-        return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-      default:
-        return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
-    }
-  }
-
-  private static TruthValue checkInBloomFilter(BloomFilterIO bf, Object predObj, boolean hasNull) {
-    TruthValue result = hasNull ? TruthValue.NO_NULL : TruthValue.NO;
-
-    if (predObj instanceof Long) {
-      if (bf.testLong(((Long) predObj).longValue())) {
-        result = TruthValue.YES_NO_NULL;
-      }
-    } else if (predObj instanceof Double) {
-      if (bf.testDouble(((Double) predObj).doubleValue())) {
-        result = TruthValue.YES_NO_NULL;
-      }
-    } else if (predObj instanceof String || predObj instanceof Text ||
-        predObj instanceof HiveDecimalWritable ||
-        predObj instanceof BigDecimal) {
-      if (bf.testString(predObj.toString())) {
-        result = TruthValue.YES_NO_NULL;
-      }
-    } else if (predObj instanceof Timestamp) {
-      if (bf.testLong(((Timestamp) predObj).getTime())) {
-        result = TruthValue.YES_NO_NULL;
-      }
-    } else if (predObj instanceof Date) {
-      if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
-        result = TruthValue.YES_NO_NULL;
-      }
-    } else {
-        // if the predicate object is null and if hasNull says there are no nulls then return NO
-        if (predObj == null && !hasNull) {
-          result = TruthValue.NO;
-        } else {
-          result = TruthValue.YES_NO_NULL;
-        }
-      }
-
-    if (result == TruthValue.YES_NO_NULL && !hasNull) {
-      result = TruthValue.YES_NO;
-    }
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Bloom filter evaluation: " + result.toString());
-    }
-
-    return result;
-  }
-
-  private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) {
-    if (obj == null) {
-      return null;
-    }
-    switch (type) {
-      case BOOLEAN:
-        if (obj instanceof Boolean) {
-          return obj;
-        } else {
-          // will only be true if the string conversion yields "true", all other values are
-          // considered false
-          return Boolean.valueOf(obj.toString());
-        }
-      case DATE:
-        if (obj instanceof Date) {
-          return obj;
-        } else if (obj instanceof String) {
-          return Date.valueOf((String) obj);
-        } else if (obj instanceof Timestamp) {
-          return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L);
-        }
-        // always string, but prevent the comparison to numbers (are they days/seconds/milliseconds?)
-        break;
-      case DECIMAL:
-        if (obj instanceof Boolean) {
-          return new HiveDecimalWritable(((Boolean) obj).booleanValue() ?
-              HiveDecimal.ONE : HiveDecimal.ZERO);
-        } else if (obj instanceof Integer) {
-          return new HiveDecimalWritable(((Integer) obj).intValue());
-        } else if (obj instanceof Long) {
-          return new HiveDecimalWritable(((Long) obj));
-        } else if (obj instanceof Float || obj instanceof Double ||
-            obj instanceof String) {
-          return new HiveDecimalWritable(obj.toString());
-        } else if (obj instanceof BigDecimal) {
-          return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) obj));
-        } else if (obj instanceof HiveDecimal) {
-          return new HiveDecimalWritable((HiveDecimal) obj);
-        } else if (obj instanceof HiveDecimalWritable) {
-          return obj;
-        } else if (obj instanceof Timestamp) {
-          return new HiveDecimalWritable(Double.toString(
-              TimestampUtils.getDouble((Timestamp) obj)));
-        }
-        break;
-      case FLOAT:
-        if (obj instanceof Number) {
-          // widening conversion
-          return ((Number) obj).doubleValue();
-        } else if (obj instanceof HiveDecimal) {
-          return ((HiveDecimal) obj).doubleValue();
-        } else if (obj instanceof String) {
-          return Double.valueOf(obj.toString());
-        } else if (obj instanceof Timestamp) {
-          return TimestampUtils.getDouble((Timestamp) obj);
-        } else if (obj instanceof HiveDecimal) {
-          return ((HiveDecimal) obj).doubleValue();
-        } else if (obj instanceof BigDecimal) {
-          return ((BigDecimal) obj).doubleValue();
-        }
-        break;
-      case LONG:
-        if (obj instanceof Number) {
-          // widening conversion
-          return ((Number) obj).longValue();
-        } else if (obj instanceof HiveDecimal) {
-          return ((HiveDecimal) obj).longValue();
-        } else if (obj instanceof String) {
-          return Long.valueOf(obj.toString());
-        }
-        break;
-      case STRING:
-        if (obj != null) {
-          return (obj.toString());
-        }
-        break;
-      case TIMESTAMP:
-        if (obj instanceof Timestamp) {
-          return obj;
-        } else if (obj instanceof Integer) {
-          return new Timestamp(((Number) obj).longValue());
-        } else if (obj instanceof Float) {
-          return TimestampUtils.doubleToTimestamp(((Float) obj).doubleValue());
-        } else if (obj instanceof Double) {
-          return TimestampUtils.doubleToTimestamp(((Double) obj).doubleValue());
-        } else if (obj instanceof HiveDecimal) {
-          return TimestampUtils.decimalToTimestamp((HiveDecimal) obj);
-        } else if (obj instanceof HiveDecimalWritable) {
-          return TimestampUtils.decimalToTimestamp(((HiveDecimalWritable) obj).getHiveDecimal());
-        } else if (obj instanceof Date) {
-          return new Timestamp(((Date) obj).getTime());
-        }
-        // float/double conversion to timestamp is interpreted as seconds whereas integer conversion
-        // to timestamp is interpreted as milliseconds by default. The integer to timestamp casting
-        // is also config driven. The filter operator changes its promotion based on config:
-        // "int.timestamp.conversion.in.seconds". Disable PPD for integer cases.
-        break;
-      default:
-        break;
-    }
-
-    throw new IllegalArgumentException(String.format(
-        "ORC SARGS could not convert from %s to %s", obj == null ? "(null)" : obj.getClass()
-            .getSimpleName(), type));
-  }
-
-  public static class SargApplier {
-    public final static boolean[] READ_ALL_RGS = null;
-    public final static boolean[] READ_NO_RGS = new boolean[0];
-
-    private final SearchArgument sarg;
-    private final List<PredicateLeaf> sargLeaves;
-    private final int[] filterColumns;
-    private final long rowIndexStride;
-    // same as the above array, but indices are set to true
-    private final boolean[] sargColumns;
-    private SchemaEvolution evolution;
-
-    public SargApplier(SearchArgument sarg, String[] columnNames,
-                       long rowIndexStride,
-                       SchemaEvolution evolution) {
-      this.sarg = sarg;
-      sargLeaves = sarg.getLeaves();
-      filterColumns = mapSargColumnsToOrcInternalColIdx(sargLeaves, evolution);
-      this.rowIndexStride = rowIndexStride;
-      // included will not be null, row options will fill the array with trues if null
-      sargColumns = new boolean[evolution.getFileIncluded().length];
-      for (int i : filterColumns) {
-        // filter columns may have -1 as index which could be partition column in SARG.
-        if (i > 0) {
-          sargColumns[i] = true;
-        }
-      }
-      this.evolution = evolution;
-    }
-
-    /**
-     * Pick the row groups that we need to load from the current stripe.
-     *
-     * @return an array with a boolean for each row group or null if all of the
-     * row groups must be read.
-     * @throws IOException
-     */
-    public boolean[] pickRowGroups(StripeInformation stripe, OrcProto.RowIndex[] indexes,
-        OrcProto.BloomFilterIndex[] bloomFilterIndices, boolean returnNone) throws IOException {
-      long rowsInStripe = stripe.getNumberOfRows();
-      int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
-      boolean[] result = new boolean[groupsInStripe]; // TODO: avoid alloc?
-      TruthValue[] leafValues = new TruthValue[sargLeaves.size()];
-      boolean hasSelected = false, hasSkipped = false;
-      for (int rowGroup = 0; rowGroup < result.length; ++rowGroup) {
-        for (int pred = 0; pred < leafValues.length; ++pred) {
-          int columnIx = filterColumns[pred];
-          if (columnIx != -1) {
-            if (indexes[columnIx] == null) {
-              throw new AssertionError("Index is not populated for " + columnIx);
-            }
-            OrcProto.RowIndexEntry entry = indexes[columnIx].getEntry(rowGroup);
-            if (entry == null) {
-              throw new AssertionError("RG is not populated for " + columnIx + " rg " + rowGroup);
-            }
-            OrcProto.ColumnStatistics stats = entry.getStatistics();
-            OrcProto.BloomFilter bf = null;
-            if (bloomFilterIndices != null && bloomFilterIndices[columnIx] != null) {
-              bf = bloomFilterIndices[columnIx].getBloomFilter(rowGroup);
-            }
-            if (evolution != null && evolution.isPPDSafeConversion(columnIx)) {
-              leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf);
-            } else {
-              leafValues[pred] = TruthValue.YES_NO_NULL;
-            }
-            if (LOG.isTraceEnabled()) {
-              LOG.trace("Stats = " + stats);
-              LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]);
-            }
-          } else {
-            // the column is a virtual column
-            leafValues[pred] = TruthValue.YES_NO_NULL;
-          }
-        }
-        result[rowGroup] = sarg.evaluate(leafValues).isNeeded();
-        hasSelected = hasSelected || result[rowGroup];
-        hasSkipped = hasSkipped || (!result[rowGroup]);
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Row group " + (rowIndexStride * rowGroup) + " to " +
-              (rowIndexStride * (rowGroup + 1) - 1) + " is " +
-              (result[rowGroup] ? "" : "not ") + "included.");
-        }
-      }
-
-      return hasSkipped ? ((hasSelected || !returnNone) ? result : READ_NO_RGS) : READ_ALL_RGS;
-    }
-  }
-
-  /**
-   * Pick the row groups that we need to load from the current stripe.
-   *
-   * @return an array with a boolean for each row group or null if all of the
-   * row groups must be read.
-   * @throws IOException
-   */
-  protected boolean[] pickRowGroups() throws IOException {
-    // if we don't have a sarg or indexes, we read everything
-    if (sargApp == null) {
-      return null;
-    }
-    readRowIndex(currentStripe, included, sargApp.sargColumns);
-    return sargApp.pickRowGroups(stripes.get(currentStripe), indexes, bloomFilterIndices, false);
-  }
-
-  private void clearStreams() {
-    // explicit close of all streams to de-ref ByteBuffers
-    for (InStream is : streams.values()) {
-      is.close();
-    }
-    if (bufferChunks != null) {
-      if (dataReader.isTrackingDiskRanges()) {
-        for (DiskRangeList range = bufferChunks; range != null; range = range.next) {
-          if (!(range instanceof BufferChunk)) {
-            continue;
-          }
-          dataReader.releaseBuffer(((BufferChunk) range).getChunk());
-        }
-      }
-    }
-    bufferChunks = null;
-    streams.clear();
-  }
-
-  /**
-   * Read the current stripe into memory.
-   *
-   * @throws IOException
-   */
-  private void readStripe() throws IOException {
-    StripeInformation stripe = beginReadStripe();
-    includedRowGroups = pickRowGroups();
-
-    // move forward to the first unskipped row
-    if (includedRowGroups != null) {
-      while (rowInStripe < rowCountInStripe &&
-          !includedRowGroups[(int) (rowInStripe / rowIndexStride)]) {
-        rowInStripe = Math.min(rowCountInStripe, rowInStripe + rowIndexStride);
-      }
-    }
-
-    // if we haven't skipped the whole stripe, read the data
-    if (rowInStripe < rowCountInStripe) {
-      // if we aren't projecting columns or filtering rows, just read it all
-      if (included == null && includedRowGroups == null) {
-        readAllDataStreams(stripe);
-      } else {
-        readPartialDataStreams(stripe);
-      }
-      reader.startStripe(streams, stripeFooter);
-      // if we skipped the first row group, move the pointers forward
-      if (rowInStripe != 0) {
-        seekToRowEntry(reader, (int) (rowInStripe / rowIndexStride));
-      }
-    }
-  }
-
-  private StripeInformation beginReadStripe() throws IOException {
-    StripeInformation stripe = stripes.get(currentStripe);
-    stripeFooter = readStripeFooter(stripe);
-    clearStreams();
-    // setup the position in the stripe
-    rowCountInStripe = stripe.getNumberOfRows();
-    rowInStripe = 0;
-    rowBaseInStripe = 0;
-    for (int i = 0; i < currentStripe; ++i) {
-      rowBaseInStripe += stripes.get(i).getNumberOfRows();
-    }
-    // reset all of the indexes
-    for (int i = 0; i < indexes.length; ++i) {
-      indexes[i] = null;
-    }
-    return stripe;
-  }
-
-  private void readAllDataStreams(StripeInformation stripe) throws IOException {
-    long start = stripe.getIndexLength();
-    long end = start + stripe.getDataLength();
-    // explicitly trigger 1 big read
-    DiskRangeList toRead = new DiskRangeList(start, end);
-    bufferChunks = dataReader.readFileData(toRead, stripe.getOffset(), false);
-    List<OrcProto.Stream> streamDescriptions = stripeFooter.getStreamsList();
-    createStreams(streamDescriptions, bufferChunks, null, codec, bufferSize, streams);
-  }
-
-  /**
-   * Plan the ranges of the file that we need to read given the list of
-   * columns and row groups.
-   *
-   * @param streamList        the list of streams available
-   * @param indexes           the indexes that have been loaded
-   * @param includedColumns   which columns are needed
-   * @param includedRowGroups which row groups are needed
-   * @param isCompressed      does the file have generic compression
-   * @param encodings         the encodings for each column
-   * @param types             the types of the columns
-   * @param compressionSize   the compression block size
-   * @return the list of disk ranges that will be loaded
-   */
-  static DiskRangeList planReadPartialDataStreams
-  (List<OrcProto.Stream> streamList,
-      OrcProto.RowIndex[] indexes,
-      boolean[] includedColumns,
-      boolean[] includedRowGroups,
-      boolean isCompressed,
-      List<OrcProto.ColumnEncoding> encodings,
-      List<OrcProto.Type> types,
-      int compressionSize,
-      boolean doMergeBuffers) {
-    long offset = 0;
-    // figure out which columns have a present stream
-    boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
-    CreateHelper list = new CreateHelper();
-    for (OrcProto.Stream stream : streamList) {
-      long length = stream.getLength();
-      int column = stream.getColumn();
-      OrcProto.Stream.Kind streamKind = stream.getKind();
-      // since stream kind is optional, first check if it exists
-      if (stream.hasKind() &&
-          (StreamName.getArea(streamKind) == StreamName.Area.DATA) &&
-          (column < includedColumns.length && includedColumns[column])) {
-        // if we aren't filtering or it is a dictionary, load it.
-        if (includedRowGroups == null
-            || RecordReaderUtils.isDictionary(streamKind, encodings.get(column))) {
-          RecordReaderUtils.addEntireStreamToRanges(offset, length, list, doMergeBuffers);
-        } else {
-          RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRowGroups,
-              isCompressed, indexes[column], encodings.get(column), types.get(column),
-              compressionSize, hasNull[column], offset, length, list, doMergeBuffers);
-        }
-      }
-      offset += length;
-    }
-    return list.extract();
-  }
-
-  void createStreams(List<OrcProto.Stream> streamDescriptions,
-      DiskRangeList ranges,
-      boolean[] includeColumn,
-      CompressionCodec codec,
-      int bufferSize,
-      Map<StreamName, InStream> streams) throws IOException {
-    long streamOffset = 0;
-    for (OrcProto.Stream streamDesc : streamDescriptions) {
-      int column = streamDesc.getColumn();
-      if ((includeColumn != null &&
-          (column < included.length && !includeColumn[column])) ||
-          streamDesc.hasKind() &&
-              (StreamName.getArea(streamDesc.getKind()) != StreamName.Area.DATA)) {
-        streamOffset += streamDesc.getLength();
-        continue;
-      }
-      List<DiskRange> buffers = RecordReaderUtils.getStreamBuffers(
-          ranges, streamOffset, streamDesc.getLength());
-      StreamName name = new StreamName(column, streamDesc.getKind());
-      streams.put(name, InStream.create(name.toString(), buffers,
-          streamDesc.getLength(), codec, bufferSize));
-      streamOffset += streamDesc.getLength();
-    }
-  }
-
-  private void readPartialDataStreams(StripeInformation stripe) throws IOException {
-    List<OrcProto.Stream> streamList = stripeFooter.getStreamsList();
-    DiskRangeList toRead = planReadPartialDataStreams(streamList,
-        indexes, included, includedRowGroups, codec != null,
-        stripeFooter.getColumnsList(), types, bufferSize, true);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("chunks = " + RecordReaderUtils.stringifyDiskRanges(toRead));
-    }
-    bufferChunks = dataReader.readFileData(toRead, stripe.getOffset(), false);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("merge = " + RecordReaderUtils.stringifyDiskRanges(bufferChunks));
-    }
-
-    createStreams(streamList, bufferChunks, included, codec, bufferSize, streams);
-  }
-
-  /**
-   * Read the next stripe until we find a row that we don't skip.
-   *
-   * @throws IOException
-   */
-  private void advanceStripe() throws IOException {
-    rowInStripe = rowCountInStripe;
-    while (rowInStripe >= rowCountInStripe &&
-        currentStripe < stripes.size() - 1) {
-      currentStripe += 1;
-      readStripe();
-    }
-  }
-
-  /**
-   * Skip over rows that we aren't selecting, so that the next row is
-   * one that we will read.
-   *
-   * @param nextRow the row we want to go to
-   * @throws IOException
-   */
-  private boolean advanceToNextRow(
-      TreeReaderFactory.TreeReader reader, long nextRow, boolean canAdvanceStripe)
-      throws IOException {
-    long nextRowInStripe = nextRow - rowBaseInStripe;
-    // check for row skipping
-    if (rowIndexStride != 0 &&
-        includedRowGroups != null &&
-        nextRowInStripe < rowCountInStripe) {
-      int rowGroup = (int) (nextRowInStripe / rowIndexStride);
-      if (!includedRowGroups[rowGroup]) {
-        while (rowGroup < includedRowGroups.length && !includedRowGroups[rowGroup]) {
-          rowGroup += 1;
-        }
-        if (rowGroup >= includedRowGroups.length) {
-          if (canAdvanceStripe) {
-            advanceStripe();
-          }
-          return canAdvanceStripe;
-        }
-        nextRowInStripe = Math.min(rowCountInStripe, rowGroup * rowIndexStride);
-      }
-    }
-    if (nextRowInStripe >= rowCountInStripe) {
-      if (canAdvanceStripe) {
-        advanceStripe();
-      }
-      return canAdvanceStripe;
-    }
-    if (nextRowInStripe != rowInStripe) {
-      if (rowIndexStride != 0) {
-        int rowGroup = (int) (nextRowInStripe / rowIndexStride);
-        seekToRowEntry(reader, rowGroup);
-        reader.skipRows(nextRowInStripe - rowGroup * rowIndexStride);
-      } else {
-        reader.skipRows(nextRowInStripe - rowInStripe);
-      }
-      rowInStripe = nextRowInStripe;
-    }
-    return true;
-  }
-
-  @Override
-  public boolean nextBatch(VectorizedRowBatch batch) throws IOException {
-    try {
-      if (rowInStripe >= rowCountInStripe) {
-        currentStripe += 1;
-        if (currentStripe >= stripes.size()) {
-          batch.size = 0;
-          return false;
-        }
-        readStripe();
-      }
-
-      int batchSize = computeBatchSize(batch.getMaxSize());
-
-      rowInStripe += batchSize;
-      reader.setVectorColumnCount(batch.getDataColumnCount());
-      reader.nextBatch(batch, batchSize);
-      batch.selectedInUse = false;
-      batch.size = batchSize;
-      advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
-      return batch.size  != 0;
-    } catch (IOException e) {
-      // Rethrow exception with file name in log message
-      throw new IOException("Error reading file: " + path, e);
-    }
-  }
-
-  private int computeBatchSize(long targetBatchSize) {
-    final int batchSize;
-    // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row
-    // groups are selected then marker position is set to the end of range (subset of row groups
-    // within strip). Batch size computed out of marker position makes sure that batch size is
-    // aware of row group boundary and will not cause overflow when reading rows
-    // illustration of this case is here https://issues.apache.org/jira/browse/HIVE-6287
-    if (rowIndexStride != 0 && includedRowGroups != null && rowInStripe < rowCountInStripe) {
-      int startRowGroup = (int) (rowInStripe / rowIndexStride);
-      if (!includedRowGroups[startRowGroup]) {
-        while (startRowGroup < includedRowGroups.length && !includedRowGroups[startRowGroup]) {
-          startRowGroup += 1;
-        }
-      }
-
-      int endRowGroup = startRowGroup;
-      while (endRowGroup < includedRowGroups.length && includedRowGroups[endRowGroup]) {
-        endRowGroup += 1;
-      }
-
-      final long markerPosition =
-          (endRowGroup * rowIndexStride) < rowCountInStripe ? (endRowGroup * rowIndexStride)
-              : rowCountInStripe;
-      batchSize = (int) Math.min(targetBatchSize, (markerPosition - rowInStripe));
-
-      if (isLogDebugEnabled && batchSize < targetBatchSize) {
-        LOG.debug("markerPosition: " + markerPosition + " batchSize: " + batchSize);
-      }
-    } else {
-      batchSize = (int) Math.min(targetBatchSize, (rowCountInStripe - rowInStripe));
-    }
-    return batchSize;
-  }
-
-  @Override
-  public void close() throws IOException {
-    clearStreams();
-    dataReader.close();
-  }
-
-  @Override
-  public long getRowNumber() {
-    return rowInStripe + rowBaseInStripe + firstRow;
-  }
-
-  /**
-   * Return the fraction of rows that have been read from the selected.
-   * section of the file
-   *
-   * @return fraction between 0.0 and 1.0 of rows consumed
-   */
-  @Override
-  public float getProgress() {
-    return ((float) rowBaseInStripe + rowInStripe) / totalRowCount;
-  }
-
-  private int findStripe(long rowNumber) {
-    for (int i = 0; i < stripes.size(); i++) {
-      StripeInformation stripe = stripes.get(i);
-      if (stripe.getNumberOfRows() > rowNumber) {
-        return i;
-      }
-      rowNumber -= stripe.getNumberOfRows();
-    }
-    throw new IllegalArgumentException("Seek after the end of reader range");
-  }
-
-  public OrcIndex readRowIndex(int stripeIndex, boolean[] included,
-                               boolean[] sargColumns) throws IOException {
-    return readRowIndex(stripeIndex, included, null, null, sargColumns);
-  }
-
-  public OrcIndex readRowIndex(int stripeIndex, boolean[] included,
-                               OrcProto.RowIndex[] indexes,
-                               OrcProto.BloomFilterIndex[] bloomFilterIndex,
-                               boolean[] sargColumns) throws IOException {
-    StripeInformation stripe = stripes.get(stripeIndex);
-    OrcProto.StripeFooter stripeFooter = null;
-    // if this is the current stripe, use the cached objects.
-    if (stripeIndex == currentStripe) {
-      stripeFooter = this.stripeFooter;
-      indexes = indexes == null ? this.indexes : indexes;
-      bloomFilterIndex = bloomFilterIndex == null ? this.bloomFilterIndices : bloomFilterIndex;
-      sargColumns = sargColumns == null ?
-          (sargApp == null ? null : sargApp.sargColumns) : sargColumns;
-    }
-    return dataReader.readRowIndex(stripe, stripeFooter, included, indexes, sargColumns,
-        bloomFilterIndex);
-  }
-
-  private void seekToRowEntry(TreeReaderFactory.TreeReader reader, int rowEntry)
-      throws IOException {
-    PositionProvider[] index = new PositionProvider[indexes.length];
-    for (int i = 0; i < indexes.length; ++i) {
-      if (indexes[i] != null) {
-        index[i] = new PositionProviderImpl(indexes[i].getEntry(rowEntry));
-      }
-    }
-    reader.seek(index);
-  }
-
-  @Override
-  public void seekToRow(long rowNumber) throws IOException {
-    if (rowNumber < 0) {
-      throw new IllegalArgumentException("Seek to a negative row number " +
-          rowNumber);
-    } else if (rowNumber < firstRow) {
-      throw new IllegalArgumentException("Seek before reader range " +
-          rowNumber);
-    }
-    // convert to our internal form (rows from the beginning of slice)
-    rowNumber -= firstRow;
-
-    // move to the right stripe
-    int rightStripe = findStripe(rowNumber);
-    if (rightStripe != currentStripe) {
-      currentStripe = rightStripe;
-      readStripe();
-    }
-    readRowIndex(currentStripe, included, sargApp == null ? null : sargApp.sargColumns);
-
-    // if we aren't to the right row yet, advance in the stripe.
-    advanceToNextRow(reader, rowNumber, true);
-  }
-
-  private static final String TRANSLATED_SARG_SEPARATOR = "_";
-  public static String encodeTranslatedSargColumn(int rootColumn, Integer indexInSourceTable) {
-    return rootColumn + TRANSLATED_SARG_SEPARATOR
-        + ((indexInSourceTable == null) ? -1 : indexInSourceTable);
-  }
-
-  public static int[] mapTranslatedSargColumns(
-      List<OrcProto.Type> types, List<PredicateLeaf> sargLeaves) {
-    int[] result = new int[sargLeaves.size()];
-    OrcProto.Type lastRoot = null; // Root will be the same for everyone as of now.
-    String lastRootStr = null;
-    for (int i = 0; i < result.length; ++i) {
-      String[] rootAndIndex = sargLeaves.get(i).getColumnName().split(TRANSLATED_SARG_SEPARATOR);
-      assert rootAndIndex.length == 2;
-      String rootStr = rootAndIndex[0], indexStr = rootAndIndex[1];
-      int index = Integer.parseInt(indexStr);
-      // First, check if the column even maps to anything.
-      if (index == -1) {
-        result[i] = -1;
-        continue;
-      }
-      assert index >= 0;
-      // Then, find the root type if needed.
-      if (!rootStr.equals(lastRootStr)) {
-        lastRoot = types.get(Integer.parseInt(rootStr));
-        lastRootStr = rootStr;
-      }
-      // Subtypes of the root types correspond, in order, to the columns in the table schema
-      // (disregarding schema evolution that doesn't presently work). Get the index for the
-      // corresponding subtype.
-      result[i] = lastRoot.getSubtypes(index);
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java b/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java
deleted file mode 100644
index 6100d50..0000000
--- a/orc/src/java/org/apache/orc/impl/RecordReaderUtils.java
+++ /dev/null
@@ -1,578 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import com.google.common.collect.Lists;
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
-import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.DataReader;
-import org.apache.orc.OrcProto;
-
-import com.google.common.collect.ComparisonChain;
-import org.apache.orc.StripeInformation;
-
-/**
- * Stateless methods shared between RecordReaderImpl and EncodedReaderImpl.
- */
-public class RecordReaderUtils {
-  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
-
-  private static class DefaultDataReader implements DataReader {
-    private FSDataInputStream file = null;
-    private final ByteBufferAllocatorPool pool;
-    private HadoopShims.ZeroCopyReaderShim zcr = null;
-    private final FileSystem fs;
-    private final Path path;
-    private final boolean useZeroCopy;
-    private final CompressionCodec codec;
-    private final int bufferSize;
-    private final int typeCount;
-
-    private DefaultDataReader(DefaultDataReader other) {
-      this.pool = other.pool;
-      this.bufferSize = other.bufferSize;
-      this.typeCount = other.typeCount;
-      this.fs = other.fs;
-      this.path = other.path;
-      this.useZeroCopy = other.useZeroCopy;
-      this.codec = other.codec;
-    }
-
-    private DefaultDataReader(DataReaderProperties properties) {
-      this.fs = properties.getFileSystem();
-      this.path = properties.getPath();
-      this.useZeroCopy = properties.getZeroCopy();
-      this.codec = PhysicalFsWriter.createCodec(properties.getCompression());
-      this.bufferSize = properties.getBufferSize();
-      this.typeCount = properties.getTypeCount();
-      if (useZeroCopy) {
-        this.pool = new ByteBufferAllocatorPool();
-      } else {
-        this.pool = null;
-      }
-    }
-
-    @Override
-    public void open() throws IOException {
-      this.file = fs.open(path);
-      if (useZeroCopy) {
-        zcr = RecordReaderUtils.createZeroCopyShim(file, codec, pool);
-      } else {
-        zcr = null;
-      }
-    }
-
-    @Override
-    public OrcIndex readRowIndex(StripeInformation stripe,
-                                 OrcProto.StripeFooter footer,
-                                 boolean[] included,
-                                 OrcProto.RowIndex[] indexes,
-                                 boolean[] sargColumns,
-                                 OrcProto.BloomFilterIndex[] bloomFilterIndices
-                                 ) throws IOException {
-      if (file == null) {
-        open();
-      }
-      if (footer == null) {
-        footer = readStripeFooter(stripe);
-      }
-      if (indexes == null) {
-        indexes = new OrcProto.RowIndex[typeCount];
-      }
-      if (bloomFilterIndices == null) {
-        bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
-      }
-      long offset = stripe.getOffset();
-      List<OrcProto.Stream> streams = footer.getStreamsList();
-      for (int i = 0; i < streams.size(); i++) {
-        OrcProto.Stream stream = streams.get(i);
-        OrcProto.Stream nextStream = null;
-        if (i < streams.size() - 1) {
-          nextStream = streams.get(i+1);
-        }
-        int col = stream.getColumn();
-        int len = (int) stream.getLength();
-        // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
-        // filter and combine the io to read row index and bloom filters for that column together
-        if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
-          boolean readBloomFilter = false;
-          if (sargColumns != null && sargColumns[col] &&
-              nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
-            len += nextStream.getLength();
-            i += 1;
-            readBloomFilter = true;
-          }
-          if ((included == null || included[col]) && indexes[col] == null) {
-            byte[] buffer = new byte[len];
-            file.readFully(offset, buffer, 0, buffer.length);
-            ByteBuffer bb = ByteBuffer.wrap(buffer);
-            indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
-                Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(),
-                codec, bufferSize));
-            if (readBloomFilter) {
-              bb.position((int) stream.getLength());
-              bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
-                  "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
-                  nextStream.getLength(), codec, bufferSize));
-            }
-          }
-        }
-        offset += len;
-      }
-
-      OrcIndex index = new OrcIndex(indexes, bloomFilterIndices);
-      return index;
-    }
-
-    @Override
-    public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
-      if (file == null) {
-        open();
-      }
-      long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
-      int tailLength = (int) stripe.getFooterLength();
-
-      // read the footer
-      ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
-      file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
-      return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream("footer",
-          Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
-          tailLength, codec, bufferSize));
-    }
-
-    @Override
-    public DiskRangeList readFileData(
-        DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException {
-      return RecordReaderUtils.readDiskRanges(file, zcr, baseOffset, range, doForceDirect);
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (pool != null) {
-        pool.clear();
-      }
-      // close both zcr and file
-      try (HadoopShims.ZeroCopyReaderShim myZcr = zcr) {
-        if (file != null) {
-          file.close();
-        }
-      }
-    }
-
-    @Override
-    public boolean isTrackingDiskRanges() {
-      return zcr != null;
-    }
-
-    @Override
-    public void releaseBuffer(ByteBuffer buffer) {
-      zcr.releaseBuffer(buffer);
-    }
-
-    @Override
-    public DataReader clone() {
-      return new DefaultDataReader(this);
-    }
-
-  }
-
-  public static DataReader createDefaultDataReader(DataReaderProperties properties) {
-    return new DefaultDataReader(properties);
-  }
-
-  public static boolean[] findPresentStreamsByColumn(
-      List<OrcProto.Stream> streamList, List<OrcProto.Type> types) {
-    boolean[] hasNull = new boolean[types.size()];
-    for(OrcProto.Stream stream: streamList) {
-      if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.PRESENT)) {
-        hasNull[stream.getColumn()] = true;
-      }
-    }
-    return hasNull;
-  }
-
-  /**
-   * Does region A overlap region B? The end points are inclusive on both sides.
-   * @param leftA A's left point
-   * @param rightA A's right point
-   * @param leftB B's left point
-   * @param rightB B's right point
-   * @return Does region A overlap region B?
-   */
-  static boolean overlap(long leftA, long rightA, long leftB, long rightB) {
-    if (leftA <= leftB) {
-      return rightA >= leftB;
-    }
-    return rightB >= leftA;
-  }
-
-  public static void addEntireStreamToRanges(
-      long offset, long length, CreateHelper list, boolean doMergeBuffers) {
-    list.addOrMerge(offset, offset + length, doMergeBuffers, false);
-  }
-
-  public static void addRgFilteredStreamToRanges(OrcProto.Stream stream,
-      boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index,
-      OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull,
-      long offset, long length, CreateHelper list, boolean doMergeBuffers) {
-    for (int group = 0; group < includedRowGroups.length; ++group) {
-      if (!includedRowGroups[group]) continue;
-      int posn = getIndexPosition(
-          encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull);
-      long start = index.getEntry(group).getPositions(posn);
-      final long nextGroupOffset;
-      boolean isLast = group == (includedRowGroups.length - 1);
-      nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn);
-
-      start += offset;
-      long end = offset + estimateRgEndOffset(
-          isCompressed, isLast, nextGroupOffset, length, compressionSize);
-      list.addOrMerge(start, end, doMergeBuffers, true);
-    }
-  }
-
-  public static long estimateRgEndOffset(boolean isCompressed, boolean isLast,
-      long nextGroupOffset, long streamLength, int bufferSize) {
-    // figure out the worst case last location
-    // if adjacent groups have the same compressed block offset then stretch the slop
-    // by factor of 2 to safely accommodate the next compression block.
-    // One for the current compression block and another for the next compression block.
-    long slop = isCompressed ? 2 * (OutStream.HEADER_SIZE + bufferSize) : WORST_UNCOMPRESSED_SLOP;
-    return isLast ? streamLength : Math.min(streamLength, nextGroupOffset + slop);
-  }
-
-  private static final int BYTE_STREAM_POSITIONS = 1;
-  private static final int RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1;
-  private static final int BITFIELD_POSITIONS = RUN_LENGTH_BYTE_POSITIONS + 1;
-  private static final int RUN_LENGTH_INT_POSITIONS = BYTE_STREAM_POSITIONS + 1;
-
-  /**
-   * Get the offset in the index positions for the column that the given
-   * stream starts.
-   * @param columnEncoding the encoding of the column
-   * @param columnType the type of the column
-   * @param streamType the kind of the stream
-   * @param isCompressed is the file compressed
-   * @param hasNulls does the column have a PRESENT stream?
-   * @return the number of positions that will be used for that stream
-   */
-  public static int getIndexPosition(OrcProto.ColumnEncoding.Kind columnEncoding,
-                              OrcProto.Type.Kind columnType,
-                              OrcProto.Stream.Kind streamType,
-                              boolean isCompressed,
-                              boolean hasNulls) {
-    if (streamType == OrcProto.Stream.Kind.PRESENT) {
-      return 0;
-    }
-    int compressionValue = isCompressed ? 1 : 0;
-    int base = hasNulls ? (BITFIELD_POSITIONS + compressionValue) : 0;
-    switch (columnType) {
-      case BOOLEAN:
-      case BYTE:
-      case SHORT:
-      case INT:
-      case LONG:
-      case FLOAT:
-      case DOUBLE:
-      case DATE:
-      case STRUCT:
-      case MAP:
-      case LIST:
-      case UNION:
-        return base;
-      case CHAR:
-      case VARCHAR:
-      case STRING:
-        if (columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
-            columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
-          return base;
-        } else {
-          if (streamType == OrcProto.Stream.Kind.DATA) {
-            return base;
-          } else {
-            return base + BYTE_STREAM_POSITIONS + compressionValue;
-          }
-        }
-      case BINARY:
-        if (streamType == OrcProto.Stream.Kind.DATA) {
-          return base;
-        }
-        return base + BYTE_STREAM_POSITIONS + compressionValue;
-      case DECIMAL:
-        if (streamType == OrcProto.Stream.Kind.DATA) {
-          return base;
-        }
-        return base + BYTE_STREAM_POSITIONS + compressionValue;
-      case TIMESTAMP:
-        if (streamType == OrcProto.Stream.Kind.DATA) {
-          return base;
-        }
-        return base + RUN_LENGTH_INT_POSITIONS + compressionValue;
-      default:
-        throw new IllegalArgumentException("Unknown type " + columnType);
-    }
-  }
-
-  // for uncompressed streams, what is the most overlap with the following set
-  // of rows (long vint literal group).
-  static final int WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512;
-
-  /**
-   * Is this stream part of a dictionary?
-   * @return is this part of a dictionary?
-   */
-  public static boolean isDictionary(OrcProto.Stream.Kind kind,
-                              OrcProto.ColumnEncoding encoding) {
-    assert kind != OrcProto.Stream.Kind.DICTIONARY_COUNT;
-    OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind();
-    return kind == OrcProto.Stream.Kind.DICTIONARY_DATA ||
-      (kind == OrcProto.Stream.Kind.LENGTH &&
-       (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
-        encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2));
-  }
-
-  /**
-   * Build a string representation of a list of disk ranges.
-   * @param range ranges to stringify
-   * @return the resulting string
-   */
-  public static String stringifyDiskRanges(DiskRangeList range) {
-    StringBuilder buffer = new StringBuilder();
-    buffer.append("[");
-    boolean isFirst = true;
-    while (range != null) {
-      if (!isFirst) {
-        buffer.append(", {");
-      } else {
-        buffer.append("{");
-      }
-      isFirst = false;
-      buffer.append(range.toString());
-      buffer.append("}");
-      range = range.next;
-    }
-    buffer.append("]");
-    return buffer.toString();
-  }
-
-  /**
-   * Read the list of ranges from the file.
-   * @param file the file to read
-   * @param base the base of the stripe
-   * @param range the disk ranges within the stripe to read
-   * @return the bytes read for each disk range, which is the same length as
-   *    ranges
-   * @throws IOException
-   */
-  static DiskRangeList readDiskRanges(FSDataInputStream file,
-                                      HadoopShims.ZeroCopyReaderShim zcr,
-                                 long base,
-                                 DiskRangeList range,
-                                 boolean doForceDirect) throws IOException {
-    if (range == null) return null;
-    DiskRangeList prev = range.prev;
-    if (prev == null) {
-      prev = new MutateHelper(range);
-    }
-    while (range != null) {
-      if (range.hasData()) {
-        range = range.next;
-        continue;
-      }
-      int len = (int) (range.getEnd() - range.getOffset());
-      long off = range.getOffset();
-      if (zcr != null) {
-        file.seek(base + off);
-        boolean hasReplaced = false;
-        while (len > 0) {
-          ByteBuffer partial = zcr.readBuffer(len, false);
-          BufferChunk bc = new BufferChunk(partial, off);
-          if (!hasReplaced) {
-            range.replaceSelfWith(bc);
-            hasReplaced = true;
-          } else {
-            range.insertAfter(bc);
-          }
-          range = bc;
-          int read = partial.remaining();
-          len -= read;
-          off += read;
-        }
-      } else {
-        // Don't use HDFS ByteBuffer API because it has no readFully, and is buggy and pointless.
-        byte[] buffer = new byte[len];
-        file.readFully((base + off), buffer, 0, buffer.length);
-        ByteBuffer bb = null;
-        if (doForceDirect) {
-          bb = ByteBuffer.allocateDirect(len);
-          bb.put(buffer);
-          bb.position(0);
-          bb.limit(len);
-        } else {
-          bb = ByteBuffer.wrap(buffer);
-        }
-        range = range.replaceSelfWith(new BufferChunk(bb, range.getOffset()));
-      }
-      range = range.next;
-    }
-    return prev.next;
-  }
-
-
-  static List<DiskRange> getStreamBuffers(DiskRangeList range, long offset, long length) {
-    // This assumes sorted ranges (as do many other parts of ORC code.
-    ArrayList<DiskRange> buffers = new ArrayList<DiskRange>();
-    if (length == 0) return buffers;
-    long streamEnd = offset + length;
-    boolean inRange = false;
-    while (range != null) {
-      if (!inRange) {
-        if (range.getEnd() <= offset) {
-          range = range.next;
-          continue; // Skip until we are in range.
-        }
-        inRange = true;
-        if (range.getOffset() < offset) {
-          // Partial first buffer, add a slice of it.
-          buffers.add(range.sliceAndShift(offset, Math.min(streamEnd, range.getEnd()), -offset));
-          if (range.getEnd() >= streamEnd) break; // Partial first buffer is also partial last buffer.
-          range = range.next;
-          continue;
-        }
-      } else if (range.getOffset() >= streamEnd) {
-        break;
-      }
-      if (range.getEnd() > streamEnd) {
-        // Partial last buffer (may also be the first buffer), add a slice of it.
-        buffers.add(range.sliceAndShift(range.getOffset(), streamEnd, -offset));
-        break;
-      }
-      // Buffer that belongs entirely to one stream.
-      // TODO: ideally we would want to reuse the object and remove it from the list, but we cannot
-      //       because bufferChunks is also used by clearStreams for zcr. Create a useless dup.
-      buffers.add(range.sliceAndShift(range.getOffset(), range.getEnd(), -offset));
-      if (range.getEnd() == streamEnd) break;
-      range = range.next;
-    }
-    return buffers;
-  }
-
-  static HadoopShims.ZeroCopyReaderShim createZeroCopyShim(FSDataInputStream file,
-      CompressionCodec codec, ByteBufferAllocatorPool pool) throws IOException {
-    if ((codec == null || ((codec instanceof DirectDecompressionCodec)
-            && ((DirectDecompressionCodec) codec).isAvailable()))) {
-      /* codec is null or is available */
-      return SHIMS.getZeroCopyReader(file, pool);
-    }
-    return null;
-  }
-
-  // this is an implementation copied from ElasticByteBufferPool in hadoop-2,
-  // which lacks a clear()/clean() operation
-  public final static class ByteBufferAllocatorPool implements HadoopShims.ByteBufferPoolShim {
-    private static final class Key implements Comparable<Key> {
-      private final int capacity;
-      private final long insertionGeneration;
-
-      Key(int capacity, long insertionGeneration) {
-        this.capacity = capacity;
-        this.insertionGeneration = insertionGeneration;
-      }
-
-      @Override
-      public int compareTo(Key other) {
-        return ComparisonChain.start().compare(capacity, other.capacity)
-            .compare(insertionGeneration, other.insertionGeneration).result();
-      }
-
-      @Override
-      public boolean equals(Object rhs) {
-        if (rhs == null) {
-          return false;
-        }
-        try {
-          Key o = (Key) rhs;
-          return (compareTo(o) == 0);
-        } catch (ClassCastException e) {
-          return false;
-        }
-      }
-
-      @Override
-      public int hashCode() {
-        return new HashCodeBuilder().append(capacity).append(insertionGeneration)
-            .toHashCode();
-      }
-    }
-
-    private final TreeMap<Key, ByteBuffer> buffers = new TreeMap<Key, ByteBuffer>();
-
-    private final TreeMap<Key, ByteBuffer> directBuffers = new TreeMap<Key, ByteBuffer>();
-
-    private long currentGeneration = 0;
-
-    private final TreeMap<Key, ByteBuffer> getBufferTree(boolean direct) {
-      return direct ? directBuffers : buffers;
-    }
-
-    public void clear() {
-      buffers.clear();
-      directBuffers.clear();
-    }
-
-    @Override
-    public ByteBuffer getBuffer(boolean direct, int length) {
-      TreeMap<Key, ByteBuffer> tree = getBufferTree(direct);
-      Map.Entry<Key, ByteBuffer> entry = tree.ceilingEntry(new Key(length, 0));
-      if (entry == null) {
-        return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer
-            .allocate(length);
-      }
-      tree.remove(entry.getKey());
-      return entry.getValue();
-    }
-
-    @Override
-    public void putBuffer(ByteBuffer buffer) {
-      TreeMap<Key, ByteBuffer> tree = getBufferTree(buffer.isDirect());
-      while (true) {
-        Key key = new Key(buffer.capacity(), currentGeneration++);
-        if (!tree.containsKey(key)) {
-          tree.put(key, buffer);
-          return;
-        }
-        // Buffers are indexed by (capacity, generation).
-        // If our key is not unique on the first try, we try again
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RedBlackTree.java b/orc/src/java/org/apache/orc/impl/RedBlackTree.java
deleted file mode 100644
index 41aa4b9..0000000
--- a/orc/src/java/org/apache/orc/impl/RedBlackTree.java
+++ /dev/null
@@ -1,311 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.impl.DynamicIntArray;
-
-/**
- * A memory efficient red-black tree that does not allocate any objects per
- * an element. This class is abstract and assumes that the child class
- * handles the key and comparisons with the key.
- */
-abstract class RedBlackTree {
-  public static final int NULL = -1;
-
-  // Various values controlling the offset of the data within the array.
-  private static final int LEFT_OFFSET = 0;
-  private static final int RIGHT_OFFSET = 1;
-  private static final int ELEMENT_SIZE = 2;
-
-  protected int size = 0;
-  private final DynamicIntArray data;
-  protected int root = NULL;
-  protected int lastAdd = 0;
-  private boolean wasAdd = false;
-
-  /**
-   * Create a set with the given initial capacity.
-   */
-  public RedBlackTree(int initialCapacity) {
-    data = new DynamicIntArray(initialCapacity * ELEMENT_SIZE);
-  }
-
-  /**
-   * Insert a new node into the data array, growing the array as necessary.
-   *
-   * @return Returns the position of the new node.
-   */
-  private int insert(int left, int right, boolean isRed) {
-    int position = size;
-    size += 1;
-    setLeft(position, left, isRed);
-    setRight(position, right);
-    return position;
-  }
-
-  /**
-   * Compare the value at the given position to the new value.
-   * @return 0 if the values are the same, -1 if the new value is smaller and
-   *         1 if the new value is larger.
-   */
-  protected abstract int compareValue(int position);
-
-  /**
-   * Is the given node red as opposed to black? To prevent having an extra word
-   * in the data array, we just the low bit on the left child index.
-   */
-  protected boolean isRed(int position) {
-    return position != NULL &&
-        (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
-  }
-
-  /**
-   * Set the red bit true or false.
-   */
-  private void setRed(int position, boolean isRed) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    if (isRed) {
-      data.set(offset, data.get(offset) | 1);
-    } else {
-      data.set(offset, data.get(offset) & ~1);
-    }
-  }
-
-  /**
-   * Get the left field of the given position.
-   */
-  protected int getLeft(int position) {
-    return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
-  }
-
-  /**
-   * Get the right field of the given position.
-   */
-  protected int getRight(int position) {
-    return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
-  }
-
-  /**
-   * Set the left field of the given position.
-   * Note that we are storing the node color in the low bit of the left pointer.
-   */
-  private void setLeft(int position, int left) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    data.set(offset, (left << 1) | (data.get(offset) & 1));
-  }
-
-  /**
-   * Set the left field of the given position.
-   * Note that we are storing the node color in the low bit of the left pointer.
-   */
-  private void setLeft(int position, int left, boolean isRed) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    data.set(offset, (left << 1) | (isRed ? 1 : 0));
-  }
-
-  /**
-   * Set the right field of the given position.
-   */
-  private void setRight(int position, int right) {
-    data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
-  }
-
-  /**
-   * Insert or find a given key in the tree and rebalance the tree correctly.
-   * Rebalancing restores the red-black aspect of the tree to maintain the
-   * invariants:
-   *   1. If a node is red, both of its children are black.
-   *   2. Each child of a node has the same black height (the number of black
-   *      nodes between it and the leaves of the tree).
-   *
-   * Inserted nodes are at the leaves and are red, therefore there is at most a
-   * violation of rule 1 at the node we just put in. Instead of always keeping
-   * the parents, this routine passing down the context.
-   *
-   * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
-   * left-right mirror images of each other). See Algorighms by Cormen,
-   * Leiserson, and Rivest for the explaination of the subcases.
-   *
-   * @param node The node that we are fixing right now.
-   * @param fromLeft Did we come down from the left?
-   * @param parent Nodes' parent
-   * @param grandparent Parent's parent
-   * @param greatGrandparent Grandparent's parent
-   * @return Does parent also need to be checked and/or fixed?
-   */
-  private boolean add(int node, boolean fromLeft, int parent,
-                      int grandparent, int greatGrandparent) {
-    if (node == NULL) {
-      if (root == NULL) {
-        lastAdd = insert(NULL, NULL, false);
-        root = lastAdd;
-        wasAdd = true;
-        return false;
-      } else {
-        lastAdd = insert(NULL, NULL, true);
-        node = lastAdd;
-        wasAdd = true;
-        // connect the new node into the tree
-        if (fromLeft) {
-          setLeft(parent, node);
-        } else {
-          setRight(parent, node);
-        }
-      }
-    } else {
-      int compare = compareValue(node);
-      boolean keepGoing;
-
-      // Recurse down to find where the node needs to be added
-      if (compare < 0) {
-        keepGoing = add(getLeft(node), true, node, parent, grandparent);
-      } else if (compare > 0) {
-        keepGoing = add(getRight(node), false, node, parent, grandparent);
-      } else {
-        lastAdd = node;
-        wasAdd = false;
-        return false;
-      }
-
-      // we don't need to fix the root (because it is always set to black)
-      if (node == root || !keepGoing) {
-        return false;
-      }
-    }
-
-
-    // Do we need to fix this node? Only if there are two reds right under each
-    // other.
-    if (isRed(node) && isRed(parent)) {
-      if (parent == getLeft(grandparent)) {
-        int uncle = getRight(grandparent);
-        if (isRed(uncle)) {
-          // case 1.1
-          setRed(parent, false);
-          setRed(uncle, false);
-          setRed(grandparent, true);
-          return true;
-        } else {
-          if (node == getRight(parent)) {
-            // case 1.2
-            // swap node and parent
-            int tmp = node;
-            node = parent;
-            parent = tmp;
-            // left-rotate on node
-            setLeft(grandparent, parent);
-            setRight(node, getLeft(parent));
-            setLeft(parent, node);
-          }
-
-          // case 1.2 and 1.3
-          setRed(parent, false);
-          setRed(grandparent, true);
-
-          // right-rotate on grandparent
-          if (greatGrandparent == NULL) {
-            root = parent;
-          } else if (getLeft(greatGrandparent) == grandparent) {
-            setLeft(greatGrandparent, parent);
-          } else {
-            setRight(greatGrandparent, parent);
-          }
-          setLeft(grandparent, getRight(parent));
-          setRight(parent, grandparent);
-          return false;
-        }
-      } else {
-        int uncle = getLeft(grandparent);
-        if (isRed(uncle)) {
-          // case 2.1
-          setRed(parent, false);
-          setRed(uncle, false);
-          setRed(grandparent, true);
-          return true;
-        } else {
-          if (node == getLeft(parent)) {
-            // case 2.2
-            // swap node and parent
-            int tmp = node;
-            node = parent;
-            parent = tmp;
-            // right-rotate on node
-            setRight(grandparent, parent);
-            setLeft(node, getRight(parent));
-            setRight(parent, node);
-          }
-          // case 2.2 and 2.3
-          setRed(parent, false);
-          setRed(grandparent, true);
-          // left-rotate on grandparent
-          if (greatGrandparent == NULL) {
-            root = parent;
-          } else if (getRight(greatGrandparent) == grandparent) {
-            setRight(greatGrandparent, parent);
-          } else {
-            setLeft(greatGrandparent, parent);
-          }
-          setRight(grandparent, getLeft(parent));
-          setLeft(parent, grandparent);
-          return false;
-        }
-      }
-    } else {
-      return true;
-    }
-  }
-
-  /**
-   * Add the new key to the tree.
-   * @return true if the element is a new one.
-   */
-  protected boolean add() {
-    add(root, false, NULL, NULL, NULL);
-    if (wasAdd) {
-      setRed(root, false);
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * Get the number of elements in the set.
-   */
-  public int size() {
-    return size;
-  }
-
-  /**
-   * Reset the table to empty.
-   */
-  public void clear() {
-    root = NULL;
-    size = 0;
-    data.clear();
-  }
-
-  /**
-   * Get the buffer size in bytes.
-   */
-  public long getSizeInBytes() {
-    return data.getSizeInBytes();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java b/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
deleted file mode 100644
index 24bd051..0000000
--- a/orc/src/java/org/apache/orc/impl/RunLengthByteReader.java
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-
-/**
- * A reader that reads a sequence of bytes. A control byte is read before
- * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
- * byte is -1 to -128, 1 to 128 literal byte values follow.
- */
-public class RunLengthByteReader {
-  private InStream input;
-  private final byte[] literals =
-    new byte[RunLengthByteWriter.MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private int used = 0;
-  private boolean repeat = false;
-
-  public RunLengthByteReader(InStream input) throws IOException {
-    this.input = input;
-  }
-
-  public void setInStream(InStream input) {
-    this.input = input;
-  }
-
-  private void readValues(boolean ignoreEof) throws IOException {
-    int control = input.read();
-    used = 0;
-    if (control == -1) {
-      if (!ignoreEof) {
-        throw new EOFException("Read past end of buffer RLE byte from " + input);
-      }
-      used = numLiterals = 0;
-      return;
-    } else if (control < 0x80) {
-      repeat = true;
-      numLiterals = control + RunLengthByteWriter.MIN_REPEAT_SIZE;
-      int val = input.read();
-      if (val == -1) {
-        throw new EOFException("Reading RLE byte got EOF");
-      }
-      literals[0] = (byte) val;
-    } else {
-      repeat = false;
-      numLiterals = 0x100 - control;
-      int bytes = 0;
-      while (bytes < numLiterals) {
-        int result = input.read(literals, bytes, numLiterals - bytes);
-        if (result == -1) {
-          throw new EOFException("Reading RLE byte literal got EOF in " + this);
-        }
-        bytes += result;
-      }
-    }
-  }
-
-  public boolean hasNext() throws IOException {
-    return used != numLiterals || input.available() > 0;
-  }
-
-  public byte next() throws IOException {
-    byte result;
-    if (used == numLiterals) {
-      readValues(false);
-    }
-    if (repeat) {
-      result = literals[0];
-    } else {
-      result = literals[used];
-    }
-    ++used;
-    return result;
-  }
-
-  public void nextVector(ColumnVector previous, long[] data, long size)
-      throws IOException {
-    previous.isRepeating = true;
-    for (int i = 0; i < size; i++) {
-      if (!previous.isNull[i]) {
-        data[i] = next();
-      } else {
-        // The default value of null for int types in vectorized
-        // processing is 1, so set that if the value is null
-        data[i] = 1;
-      }
-
-      // The default value for nulls in Vectorization for int types is 1
-      // and given that non null value can also be 1, we need to check for isNull also
-      // when determining the isRepeating flag.
-      if (previous.isRepeating
-          && i > 0
-          && ((data[0] != data[i]) ||
-              (previous.isNull[0] != previous.isNull[i]))) {
-        previous.isRepeating = false;
-      }
-    }
-  }
-
-  /**
-   * Read the next size bytes into the data array, skipping over any slots
-   * where isNull is true.
-   * @param isNull if non-null, skip any rows where isNull[r] is true
-   * @param data the array to read into
-   * @param size the number of elements to read
-   * @throws IOException
-   */
-  public void nextVector(boolean[] isNull, int[] data,
-                         long size) throws IOException {
-    if (isNull == null) {
-      for(int i=0; i < size; ++i) {
-        data[i] = next();
-      }
-    } else {
-      for(int i=0; i < size; ++i) {
-        if (!isNull[i]) {
-          data[i] = next();
-        }
-      }
-    }
-  }
-
-  public void seek(PositionProvider index) throws IOException {
-    input.seek(index);
-    int consumed = (int) index.getNext();
-    if (consumed != 0) {
-      // a loop is required for cases where we break the run into two parts
-      while (consumed > 0) {
-        readValues(false);
-        used = consumed;
-        consumed -= numLiterals;
-      }
-    } else {
-      used = 0;
-      numLiterals = 0;
-    }
-  }
-
-  public void skip(long items) throws IOException {
-    while (items > 0) {
-      if (used == numLiterals) {
-        readValues(false);
-      }
-      long consume = Math.min(items, numLiterals - used);
-      used += consume;
-      items -= consume;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "byte rle " + (repeat ? "repeat" : "literal") + " used: " +
-        used + "/" + numLiterals + " from " + input;
-  }
-}


[11/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestStringDictionary.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestStringDictionary.java b/orc/src/test/org/apache/hive/orc/TestStringDictionary.java
new file mode 100644
index 0000000..6e24819
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestStringDictionary.java
@@ -0,0 +1,291 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import org.apache.hive.orc.impl.RecordReaderImpl;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestStringDictionary {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
+      + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testTooManyDistinct() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+                                   .compress(CompressionKind.NONE)
+                                   .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector col = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      col.setVal(batch.size++, String.valueOf(i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    col = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(idx++), col.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        Assert.assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testHalfDistinct() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
+            .bufferSize(10000));
+    Random rand = new Random(123);
+    int[] input = new int[20000];
+    for (int i = 0; i < 20000; i++) {
+      input[i] = rand.nextInt(10000);
+    }
+
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector col = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      col.setVal(batch.size++, String.valueOf(input[i]).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    col = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(input[idx++]), col.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        Assert.assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testTooManyDistinctCheckDisabled() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    conf.setBoolean(OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getAttribute(), false);
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      string.setVal(batch.size++, String.valueOf(i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    string = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(idx++), string.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        Assert.assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testHalfDistinctCheckDisabled() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    conf.setBoolean(OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getAttribute(),
+        false);
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000));
+    Random rand = new Random(123);
+    int[] input = new int[20000];
+    for (int i = 0; i < 20000; i++) {
+      input[i] = rand.nextInt(10000);
+    }
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      string.setVal(batch.size++, String.valueOf(input[i]).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    string = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(input[idx++]), string.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        Assert.assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testTooManyDistinctV11AlwaysDictionary() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+            .compress(CompressionKind.NONE)
+            .version(OrcFile.Version.V_0_11).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      string.setVal(batch.size++, String.valueOf(i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    batch = reader.getSchema().createRowBatch();
+    string = (BytesColumnVector) batch.cols[0];
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(idx++), string.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        Assert.assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY, encoding.getKind());
+      }
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestTypeDescription.java b/orc/src/test/org/apache/hive/orc/TestTypeDescription.java
new file mode 100644
index 0000000..1887209
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestTypeDescription.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class TestTypeDescription {
+
+  @Test
+  public void testJson() {
+    TypeDescription bin = TypeDescription.createBinary();
+    assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}",
+        bin.toJson());
+    assertEquals("binary", bin.toString());
+    TypeDescription struct = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal());
+    assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>",
+        struct.toString());
+    assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, \"fields\": [\n"
+            + "  \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n"
+            + "  \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n"
+            + "  \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 38, \"scale\": 10}]}",
+        struct.toJson());
+    struct = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    assertEquals("struct<f1:uniontype<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
+        struct.toString());
+    assertEquals(
+        "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
+            "  \"f1\": {\"category\": \"uniontype\", \"id\": 1, \"max\": 3, \"children\": [\n" +
+            "    {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
+            "    {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" +
+            "  \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" +
+            "    \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" +
+            "    \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" +
+            "    \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 7}]},\n" +
+            "  \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, \"length\": 100}]}",
+        struct.toJson());
+  }
+
+  @Test
+  public void testEquals() {
+    TypeDescription type1 =
+        TypeDescription.createStruct()
+        .addField("a", TypeDescription.createInt())
+        .addField("b", TypeDescription.createStruct()
+                         .addField("x", TypeDescription.createString())
+                         .addField("y", TypeDescription.createBinary())
+                         .addField("z", TypeDescription.createDouble()))
+        .addField("c", TypeDescription.createString());
+    assertEquals(0, type1.getId());
+    assertEquals(6, type1.getMaximumId());
+    TypeDescription type2 =
+        TypeDescription.createStruct()
+        .addField("x", TypeDescription.createString())
+        .addField("y", TypeDescription.createBinary())
+        .addField("z", TypeDescription.createDouble());
+    assertEquals(0, type2.getId());
+    assertEquals(3, type2.getMaximumId());
+    assertEquals(type2, type1.getChildren().get(1));
+    assertEquals(type2.hashCode(), type1.getChildren().get(1).hashCode());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestUnrolledBitPack.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestUnrolledBitPack.java b/orc/src/test/org/apache/hive/orc/TestUnrolledBitPack.java
new file mode 100644
index 0000000..a7a9de4
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestUnrolledBitPack.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+
+@RunWith(value = Parameterized.class)
+public class TestUnrolledBitPack {
+
+  private long val;
+
+  public TestUnrolledBitPack(long val) {
+    this.val = val;
+  }
+
+  @Parameters
+  public static Collection<Object[]> data() {
+    Object[][] data = new Object[][] { { -1 }, { 1 }, { 7 }, { -128 }, { 32000 }, { 8300000 },
+        { Integer.MAX_VALUE }, { 540000000000L }, { 140000000000000L }, { 36000000000000000L },
+        { Long.MAX_VALUE } };
+    return Arrays.asList(data);
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
+      + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testBitPacking() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { val, 0, val, val, 0, val, 0, val, val, 0, val, 0, val, val, 0, 0,
+        val, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val,
+        0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0,
+        0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0,
+        val, 0, val, 0, 0, val, 0, val, 0, 0, val, val };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .compress(CompressionKind.NONE).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      int row = batch.size++;
+      ((LongColumnVector) batch.cols[0]).vector[row] = l;
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+}


[07/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
new file mode 100644
index 0000000..8a6337d
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestRunLengthIntegerReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+        new OutStream("test", 1000, codec, collect), true);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[4096];
+    Random random = new Random(99);
+    int[] junk = new int[2048];
+    for(int i=0; i < junk.length; ++i) {
+      junk[i] = random.nextInt();
+    }
+    for(int i=0; i < 4096; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, incrementing runs, non-runs
+      if (i < 1024) {
+        out.write(i/4);
+      } else if (i < 2048) {
+        out.write(2*i);
+      } else {
+        out.write(junk[i-2048]);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+        ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+            codec, 1000), true);
+    for(int i=0; i < 2048; ++i) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+        new OutStream("test", 100, null, collect), true);
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write(i);
+      } else {
+        out.write(256 * i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+        ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+            null, 100), true);
+    for(int i=0; i < 2048; i += 10) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i, x);
+      } else {
+        assertEquals(256 * i, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
new file mode 100644
index 0000000..cc963c8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
@@ -0,0 +1,480 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.Reader;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestSchemaEvolution {
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  Configuration conf;
+  Path testFilePath;
+  FileSystem fs;
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  @Before
+  public void setup() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testDataTypeConversion1() throws IOException {
+    TypeDescription fileStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+    assertFalse(same1.hasConversion());
+    TypeDescription readerStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+    assertFalse(both1.hasConversion());
+    TypeDescription readerStruct1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+    assertTrue(both1diff.hasConversion());
+    TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+    SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, null);
+    assertTrue(both1diffPrecision.hasConversion());
+  }
+
+  @Test
+  public void testDataTypeConversion2() throws IOException {
+    TypeDescription fileStruct2 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    SchemaEvolution same2 = new SchemaEvolution(fileStruct2, null);
+    assertFalse(same2.hasConversion());
+    TypeDescription readerStruct2 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    SchemaEvolution both2 = new SchemaEvolution(fileStruct2, readerStruct2, null);
+    assertFalse(both2.hasConversion());
+    TypeDescription readerStruct2diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createByte()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null);
+    assertTrue(both2diff.hasConversion());
+    TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(80));
+    SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null);
+    assertTrue(both2diffChar.hasConversion());
+  }
+
+  @Test
+  public void testFloatToDoubleEvolution() throws Exception {
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    TypeDescription schema = TypeDescription.createFloat();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
+    DoubleColumnVector dcv = new DoubleColumnVector(1024);
+    batch.cols[0] = dcv;
+    batch.reset();
+    batch.size = 1;
+    dcv.vector[0] = 74.72f;
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    TypeDescription schemaOnRead = TypeDescription.createDouble();
+    RecordReader rows = reader.rows(new Reader.Options().schema(schemaOnRead));
+    batch = schemaOnRead.createRowBatch();
+    rows.nextBatch(batch);
+    assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001);
+    rows.close();
+  }
+
+  @Test
+  public void testSafePpdEvaluation() throws IOException {
+    TypeDescription fileStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+    assertTrue(same1.isPPDSafeConversion(0));
+    assertFalse(same1.hasConversion());
+    TypeDescription readerStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+    assertFalse(both1.hasConversion());
+    assertTrue(both1.isPPDSafeConversion(0));
+    assertTrue(both1.isPPDSafeConversion(1));
+    assertTrue(both1.isPPDSafeConversion(2));
+    assertTrue(both1.isPPDSafeConversion(3));
+
+    // int -> long
+    TypeDescription readerStruct1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+    assertTrue(both1diff.hasConversion());
+    assertFalse(both1diff.isPPDSafeConversion(0));
+    assertTrue(both1diff.isPPDSafeConversion(1));
+    assertTrue(both1diff.isPPDSafeConversion(2));
+    assertTrue(both1diff.isPPDSafeConversion(3));
+
+    // decimal(38,10) -> decimal(12, 10)
+    TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+    SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision,
+        new boolean[] {true, false, false, true});
+    assertTrue(both1diffPrecision.hasConversion());
+    assertFalse(both1diffPrecision.isPPDSafeConversion(0));
+    assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included
+    assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included
+    assertFalse(both1diffPrecision.isPPDSafeConversion(3));
+
+    // add columns
+    readerStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10))
+        .addField("f4", TypeDescription.createBoolean());
+    both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+    assertTrue(both1.hasConversion());
+    assertFalse(both1.isPPDSafeConversion(0));
+    assertTrue(both1.isPPDSafeConversion(1));
+    assertTrue(both1.isPPDSafeConversion(2));
+    assertTrue(both1.isPPDSafeConversion(3));
+    assertFalse(both1.isPPDSafeConversion(4));
+  }
+
+  @Test
+  public void testSafePpdEvaluationForInts() throws IOException {
+    // byte -> short -> int -> long
+    TypeDescription fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertFalse(schemaEvolution.hasConversion());
+
+    // byte -> short
+    TypeDescription readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // byte -> int
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // byte -> long
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // short -> int -> long
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertFalse(schemaEvolution.hasConversion());
+
+    // unsafe conversion short -> byte
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // short -> int
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // short -> long
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // int -> long
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertFalse(schemaEvolution.hasConversion());
+
+    // unsafe conversion int -> byte
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // unsafe conversion int -> short
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // int -> long
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // long
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // unsafe conversion long -> byte
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // unsafe conversion long -> short
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // unsafe conversion long -> int
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createFloat());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createTimestamp());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+  }
+
+  @Test
+  public void testSafePpdEvaluationForStrings() throws IOException {
+    TypeDescription fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // string -> char
+    TypeDescription readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // string -> varchar
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // char -> string
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // char -> varchar
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // varchar -> string
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // varchar -> char
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createDecimal());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createDate());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+  }
+
+  @Test
+  public void ensureFileIncluded() throws IOException {
+    TypeDescription file = TypeDescription.fromString("struct<x:int,y:int>");
+    SchemaEvolution evolution = new SchemaEvolution(file, null);
+    boolean[] include = evolution.getFileIncluded();
+    assertEquals(3, include.length);
+    for(int i=0; i < include.length; ++i) {
+      assertTrue("element " + i, include[i]);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java b/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
new file mode 100644
index 0000000..5bcee60
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.util.Random;
+
+import org.junit.Test;
+
+import com.google.common.math.LongMath;
+
+public class TestSerializationUtils {
+
+  private InputStream fromBuffer(ByteArrayOutputStream buffer) {
+    return new ByteArrayInputStream(buffer.toByteArray());
+  }
+
+  @Test
+  public void testDoubles() throws Exception {
+    double tolerance = 0.0000000000000001;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    utils.writeDouble(buffer, 1343822337.759);
+    assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), tolerance);
+    buffer = new ByteArrayOutputStream();
+    utils.writeDouble(buffer, 0.8);
+    double got = utils.readDouble(fromBuffer(buffer));
+    assertEquals(0.8, got, tolerance);
+  }
+
+  @Test
+  public void testBigIntegers() throws Exception {
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0));
+    assertArrayEquals(new byte[]{0}, buffer.toByteArray());
+    assertEquals(0L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1));
+    assertArrayEquals(new byte[]{2}, buffer.toByteArray());
+    assertEquals(1L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1));
+    assertArrayEquals(new byte[]{1}, buffer.toByteArray());
+    assertEquals(-1L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50));
+    assertArrayEquals(new byte[]{100}, buffer.toByteArray());
+    assertEquals(50L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50));
+    assertArrayEquals(new byte[]{99}, buffer.toByteArray());
+    assertEquals(-50L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    for(int i=-8192; i < 8192; ++i) {
+      buffer.reset();
+        SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i));
+      assertEquals("compare length for " + i,
+            i >= -64 && i < 64 ? 1 : 2, buffer.size());
+      assertEquals("compare result for " + i,
+          i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue());
+    }
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("123456789abcdef0",16));
+    assertEquals(new BigInteger("123456789abcdef0",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("-123456789abcdef0",16));
+    assertEquals(new BigInteger("-123456789abcdef0",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    StringBuilder buf = new StringBuilder();
+    for(int i=0; i < 256; ++i) {
+      String num = Integer.toHexString(i);
+      if (num.length() == 1) {
+        buf.append('0');
+      }
+      buf.append(num);
+    }
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger(buf.toString(),16));
+    assertEquals(new BigInteger(buf.toString(),16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("ff000000000000000000000000000000000000000000ff",16));
+    assertEquals(
+        new BigInteger("ff000000000000000000000000000000000000000000ff",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+  }
+
+  @Test
+  public void testSubtractionOverflow() {
+    // cross check results with Guava results below
+    SerializationUtils utils = new SerializationUtils();
+    assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE));
+    assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE));
+    assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE));
+    assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L));
+    assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE));
+    assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0));
+  }
+
+  @Test
+  public void testSubtractionOverflowGuava() {
+    try {
+      LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    try {
+      LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    try {
+      LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    assertEquals(-8106206116692740190L,
+        LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L));
+    assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE));
+    assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0));
+  }
+
+  @Test
+  public void testRandomFloats() throws Exception {
+    float tolerance = 0.0000000000000001f;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    Random rand = new Random();
+    int n = 100_000;
+    float[] expected = new float[n];
+    for (int i = 0; i < n; i++) {
+      float f = rand.nextFloat();
+      expected[i] = f;
+      utils.writeFloat(buffer, f);
+    }
+    InputStream newBuffer = fromBuffer(buffer);
+    for (int i = 0; i < n; i++) {
+      float got = utils.readFloat(newBuffer);
+      assertEquals(expected[i], got, tolerance);
+    }
+  }
+
+  @Test
+  public void testRandomDoubles() throws Exception {
+    double tolerance = 0.0000000000000001;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    Random rand = new Random();
+    int n = 100_000;
+    double[] expected = new double[n];
+    for (int i = 0; i < n; i++) {
+      double d = rand.nextDouble();
+      expected[i] = d;
+      utils.writeDouble(buffer, d);
+    }
+    InputStream newBuffer = fromBuffer(buffer);
+    for (int i = 0; i < n; i++) {
+      double got = utils.readDouble(newBuffer);
+      assertEquals(expected[i], got, tolerance);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java b/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
new file mode 100644
index 0000000..4aed06c
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.OrcProto;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestStreamName {
+
+  @Test
+  public void test1() throws Exception {
+    StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    StreamName s2 = new StreamName(3,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
+    StreamName s4 = new StreamName(5,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    assertEquals(true, s1.equals(s1));
+    assertEquals(false, s1.equals(s2));
+    assertEquals(false, s1.equals(s3));
+    assertEquals(true, s1.equals(s1p));
+    assertEquals(true, s1.compareTo(null) < 0);
+    assertEquals(false, s1.equals(null));
+    assertEquals(true, s1.compareTo(s2) < 0);
+    assertEquals(true, s2.compareTo(s3) < 0);
+    assertEquals(true, s3.compareTo(s4) < 0);
+    assertEquals(true, s4.compareTo(s1p) > 0);
+    assertEquals(0, s1p.compareTo(s1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java b/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
new file mode 100644
index 0000000..c3e51b8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Test the red-black tree with string keys.
+ */
+public class TestStringRedBlackTree {
+
+  /**
+   * Checks the red-black tree rules to make sure that we have correctly built
+   * a valid tree.
+   *
+   * Properties:
+   *   1. Red nodes must have black children
+   *   2. Each node must have the same black height on both sides.
+   *
+   * @param node The id of the root of the subtree to check for the red-black
+   *        tree properties.
+   * @return The black-height of the subtree.
+   */
+  private int checkSubtree(RedBlackTree tree, int node, IntWritable count
+                          ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      return 1;
+    }
+    count.set(count.get() + 1);
+    boolean is_red = tree.isRed(node);
+    int left = tree.getLeft(node);
+    int right = tree.getRight(node);
+    if (is_red) {
+      if (tree.isRed(left)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Left node of " + node + " is " + left +
+          " and both are red.");
+      }
+      if (tree.isRed(right)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Right node of " + node + " is " +
+          right + " and both are red.");
+      }
+    }
+    int left_depth = checkSubtree(tree, left, count);
+    int right_depth = checkSubtree(tree, right, count);
+    if (left_depth != right_depth) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Lopsided tree at node " + node +
+        " with depths " + left_depth + " and " + right_depth);
+    }
+    if (is_red) {
+      return left_depth;
+    } else {
+      return left_depth + 1;
+    }
+  }
+
+  /**
+   * Checks the validity of the entire tree. Also ensures that the number of
+   * nodes visited is the same as the size of the set.
+   */
+  void checkTree(RedBlackTree tree) throws IOException {
+    IntWritable count = new IntWritable(0);
+    if (tree.isRed(tree.root)) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("root is red");
+    }
+    checkSubtree(tree, tree.root, count);
+    if (count.get() != tree.size) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Broken tree! visited= " + count.get() +
+        " size=" + tree.size);
+    }
+  }
+
+  void printTree(RedBlackTree tree, String indent, int node
+                ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      System.err.println(indent + "NULL");
+    } else {
+      System.err.println(indent + "Node " + node + " color " +
+        (tree.isRed(node) ? "red" : "black"));
+      printTree(tree, indent + "  ", tree.getLeft(node));
+      printTree(tree, indent + "  ", tree.getRight(node));
+    }
+  }
+
+  private static class MyVisitor implements StringRedBlackTree.Visitor {
+    private final String[] words;
+    private final int[] order;
+    private final DataOutputBuffer buffer = new DataOutputBuffer();
+    int current = 0;
+
+    MyVisitor(String[] args, int[] order) {
+      words = args;
+      this.order = order;
+    }
+
+    @Override
+    public void visit(StringRedBlackTree.VisitorContext context
+                     ) throws IOException {
+      String word = context.getText().toString();
+      assertEquals("in word " + current, words[current], word);
+      assertEquals("in word " + current, order[current],
+        context.getOriginalPosition());
+      buffer.reset();
+      context.writeBytes(buffer);
+      assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
+      current += 1;
+    }
+  }
+
+  void checkContents(StringRedBlackTree tree, int[] order,
+                     String... params
+                    ) throws IOException {
+    tree.visit(new MyVisitor(params, order));
+  }
+
+  StringRedBlackTree buildTree(String... params) throws IOException {
+    StringRedBlackTree result = new StringRedBlackTree(1000);
+    for(String word: params) {
+      result.add(word);
+      checkTree(result);
+    }
+    return result;
+  }
+
+  @Test
+  public void test1() throws Exception {
+    StringRedBlackTree tree = new StringRedBlackTree(5);
+    assertEquals(0, tree.getSizeInBytes());
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(3, tree.add("greg"));
+    checkTree(tree);
+    assertEquals(4, tree.add("eric"));
+    checkTree(tree);
+    assertEquals(5, tree.add("arun"));
+    checkTree(tree);
+    assertEquals(6, tree.size());
+    checkTree(tree);
+    assertEquals(6, tree.add("eric14"));
+    checkTree(tree);
+    assertEquals(7, tree.add("o"));
+    checkTree(tree);
+    assertEquals(8, tree.add("ziggy"));
+    checkTree(tree);
+    assertEquals(9, tree.add("z"));
+    checkTree(tree);
+    checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8},
+      "alan", "arun", "ashutosh", "eric", "eric14", "greg",
+      "o", "owen", "z", "ziggy");
+    assertEquals(32888, tree.getSizeInBytes());
+    // check that adding greg again bumps the count
+    assertEquals(3, tree.add("greg"));
+    assertEquals(41, tree.getCharacterSize());
+    // add some more strings to test the different branches of the
+    // rebalancing
+    assertEquals(10, tree.add("zak"));
+    checkTree(tree);
+    assertEquals(11, tree.add("eric1"));
+    checkTree(tree);
+    assertEquals(12, tree.add("ash"));
+    checkTree(tree);
+    assertEquals(13, tree.add("harry"));
+    checkTree(tree);
+    assertEquals(14, tree.add("john"));
+    checkTree(tree);
+    tree.clear();
+    checkTree(tree);
+    assertEquals(0, tree.getSizeInBytes());
+    assertEquals(0, tree.getCharacterSize());
+  }
+
+  @Test
+  public void test2() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+        "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+      15,16,17, 18,19,20, 21,22,23, 24,25},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+
+  @Test
+  public void test3() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
+        "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+      13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestZlib.java b/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
new file mode 100644
index 0000000..c87f4a8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+public class TestZlib {
+
+  @Test
+  public void testNoOverflow() throws Exception {
+    ByteBuffer in = ByteBuffer.allocate(10);
+    ByteBuffer out = ByteBuffer.allocate(10);
+    in.put(new byte[]{1,2,3,4,5,6,7,10});
+    in.flip();
+    CompressionCodec codec = new ZlibCodec();
+    assertEquals(false, codec.compress(in, out, null));
+  }
+
+  @Test
+  public void testCorrupt() throws Exception {
+    ByteBuffer buf = ByteBuffer.allocate(1000);
+    buf.put(new byte[]{127,-128,0,99,98,-1});
+    buf.flip();
+    CompressionCodec codec = new ZlibCodec();
+    ByteBuffer out = ByteBuffer.allocate(1000);
+    try {
+      codec.decompress(buf, out);
+      fail();
+    } catch (IOException ioe) {
+      // EXPECTED
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java b/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
new file mode 100644
index 0000000..50e6208
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
@@ -0,0 +1,485 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFileDump {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static TypeDescription getMyRecordType() {
+    return TypeDescription.createStruct()
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("s", TypeDescription.createString());
+  }
+
+  static void appendMyRecord(VectorizedRowBatch batch,
+                             int i,
+                             long l,
+                             String str) {
+    ((LongColumnVector) batch.cols[0]).vector[batch.size] = i;
+    ((LongColumnVector) batch.cols[1]).vector[batch.size] = l;
+    if (str == null) {
+      batch.cols[2].noNulls = false;
+      batch.cols[2].isNull[batch.size] = true;
+    } else {
+      ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+          str.getBytes());
+    }
+    batch.size += 1;
+  }
+
+  static TypeDescription getAllTypesType() {
+    return TypeDescription.createStruct()
+        .addField("b", TypeDescription.createBoolean())
+        .addField("bt", TypeDescription.createByte())
+        .addField("s", TypeDescription.createShort())
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("f", TypeDescription.createFloat())
+        .addField("d", TypeDescription.createDouble())
+        .addField("de", TypeDescription.createDecimal())
+        .addField("t", TypeDescription.createTimestamp())
+        .addField("dt", TypeDescription.createDate())
+        .addField("str", TypeDescription.createString())
+        .addField("c", TypeDescription.createChar().withMaxLength(5))
+        .addField("vc", TypeDescription.createVarchar().withMaxLength(10))
+        .addField("m", TypeDescription.createMap(
+            TypeDescription.createString(),
+            TypeDescription.createString()))
+        .addField("a", TypeDescription.createList(TypeDescription.createInt()))
+        .addField("st", TypeDescription.createStruct()
+                .addField("i", TypeDescription.createInt())
+                .addField("s", TypeDescription.createString()));
+  }
+
+  static void appendAllTypes(VectorizedRowBatch batch,
+                             boolean b,
+                             byte bt,
+                             short s,
+                             int i,
+                             long l,
+                             float f,
+                             double d,
+                             HiveDecimalWritable de,
+                             Timestamp t,
+                             DateWritable dt,
+                             String str,
+                             String c,
+                             String vc,
+                             Map<String, String> m,
+                             List<Integer> a,
+                             int sti,
+                             String sts) {
+    int row = batch.size++;
+    ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0;
+    ((LongColumnVector) batch.cols[1]).vector[row] = bt;
+    ((LongColumnVector) batch.cols[2]).vector[row] = s;
+    ((LongColumnVector) batch.cols[3]).vector[row] = i;
+    ((LongColumnVector) batch.cols[4]).vector[row] = l;
+    ((DoubleColumnVector) batch.cols[5]).vector[row] = f;
+    ((DoubleColumnVector) batch.cols[6]).vector[row] = d;
+    ((DecimalColumnVector) batch.cols[7]).vector[row].set(de);
+    ((TimestampColumnVector) batch.cols[8]).set(row, t);
+    ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays();
+    ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes());
+    ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes());
+    ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes());
+    MapColumnVector map = (MapColumnVector) batch.cols[13];
+    int offset = map.childCount;
+    map.offsets[row] = offset;
+    map.lengths[row] = m.size();
+    map.childCount += map.lengths[row];
+    for(Map.Entry<String, String> entry: m.entrySet()) {
+      ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
+      ((BytesColumnVector) map.values).setVal(offset++,
+          entry.getValue().getBytes());
+    }
+    ListColumnVector list = (ListColumnVector) batch.cols[14];
+    offset = list.childCount;
+    list.offsets[row] = offset;
+    list.lengths[row] = a.size();
+    list.childCount += list.lengths[row];
+    for(int e=0; e < a.size(); ++e) {
+      ((LongColumnVector) list.child).vector[offset + e] = a.get(e);
+    }
+    StructColumnVector struct = (StructColumnVector) batch.cols[15];
+    ((LongColumnVector) struct.fields[0]).vector[row] = sti;
+    ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes());
+  }
+
+  public static void checkOutput(String expected,
+                                 String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader
+            (TestJsonFileDump.getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine().trim();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine().trim();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      Assert.assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+      expectedLine = expectedLine == null ? null : expectedLine.trim();
+    }
+    Assert.assertNull(eStream.readLine());
+    Assert.assertNull(aStream.readLine());
+    eStream.close();
+    aStream.close();
+  }
+
+  @Test
+  public void testDump() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .compress(CompressionKind.ZLIB)
+            .stripeSize(100000)
+            .rowIndexStride(1000));
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testDataDump() throws Exception {
+    TypeDescription schema = getAllTypesType();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .rowIndexStride(1000));
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Map<String, String> m = new HashMap<String, String>(2);
+    m.put("k1", "v1");
+    appendAllTypes(batch,
+        true,
+        (byte) 10,
+        (short) 100,
+        1000,
+        10000L,
+        4.0f,
+        20.0,
+        new HiveDecimalWritable("4.2222"),
+        new Timestamp(1416967764000L),
+        new DateWritable(new Date(1416967764000L)),
+        "string",
+        "hello",
+       "hello",
+        m,
+        Arrays.asList(100, 200),
+        10, "foo");
+    m.clear();
+    m.put("k3", "v3");
+    appendAllTypes(
+        batch,
+        false,
+        (byte)20,
+        (short)200,
+        2000,
+        20000L,
+        8.0f,
+        40.0,
+        new HiveDecimalWritable("2.2222"),
+        new Timestamp(1416967364000L),
+        new DateWritable(new Date(1411967764000L)),
+        "abcd",
+        "world",
+        "world",
+        m,
+        Arrays.asList(200, 300),
+        20, "bar");
+    writer.addRowBatch(batch);
+
+    writer.close();
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-d"});
+    System.out.flush();
+    System.setOut(origOut);
+    String[] lines = myOut.toString().split("\n");
+    Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
+    Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
+  }
+
+  // Test that if the fraction of rows that have distinct strings is greater than the configured
+  // threshold dictionary encoding is turned off.  If dictionary encoding is turned off the length
+  // of the dictionary stream for the column will be 0 in the ORC file dump.
+  @Test
+  public void testDictionaryThreshold() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    Configuration conf = new Configuration();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f);
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.ZLIB)
+            .rowIndexStride(1000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    int nextInt = 0;
+    for(int i=0; i < 21000; ++i) {
+      // Write out the same string twice, this guarantees the fraction of rows with
+      // distinct strings is 0.5
+      if (i % 2 == 0) {
+        nextInt = r1.nextInt(words.length);
+        // Append the value of i to the word, this guarantees when an index or word is repeated
+        // the actual string is unique.
+        words[nextInt] += "-" + i;
+      }
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size != 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-dictionary-threshold.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testBloomFilter() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("S");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-bloomfilter.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testBloomFilter2() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("l")
+        .bloomFilterFpp(0.01);
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-bloomfilter2.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java b/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
new file mode 100644
index 0000000..efded7a
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.net.URL;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestJsonFileDump {
+  public static String getFileFromClasspath(String name) {
+    URL url = ClassLoader.getSystemResource(name);
+    if (url == null) {
+      throw new IllegalArgumentException("Could not find " + name);
+    }
+    return url.getPath();
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static void checkOutput(String expected,
+                                  String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+    }
+    assertNull(eStream.readLine());
+    assertNull(aStream.readLine());
+  }
+
+  @Test
+  public void testJsonDump() throws Exception {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("s", TypeDescription.createString());
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("s");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt();
+      ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong();
+      if (i % 100 == 0) {
+        batch.cols[2].noNulls = false;
+        batch.cols[2].isNull[batch.size] = true;
+      } else {
+        ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+            words[r1.nextInt(words.length)].getBytes());
+      }
+      batch.size += 1;
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.json";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}


[02/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
deleted file mode 100644
index 79f67e3..0000000
--- a/orc/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ /dev/null
@@ -1,1709 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import static junit.framework.Assert.assertEquals;
-import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.any;
-import static org.mockito.Mockito.atLeastOnce;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PositionedReadable;
-import org.apache.hadoop.fs.Seekable;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl;
-import org.apache.orc.BloomFilterIO;
-import org.apache.orc.DataReader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.apache.orc.impl.RecordReaderImpl.Location;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.io.DataOutputBuffer;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.orc.OrcProto;
-
-import org.junit.Test;
-import org.mockito.MockSettings;
-import org.mockito.Mockito;
-
-public class TestRecordReaderImpl {
-  /**
-   * Create a predicate leaf. This is used by another test.
-   */
-  public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
-                                                  PredicateLeaf.Type type,
-                                                  String columnName,
-                                                  Object literal,
-                                                  List<Object> literalList) {
-    return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
-        literal, literalList, null);
-  }
-
-  // can add .verboseLogging() to cause Mockito to log invocations
-  private final MockSettings settings = Mockito.withSettings().verboseLogging();
-
-  static class BufferInStream
-      extends InputStream implements PositionedReadable, Seekable {
-    private final byte[] buffer;
-    private final int length;
-    private int position = 0;
-
-    BufferInStream(byte[] bytes, int length) {
-      this.buffer = bytes;
-      this.length = length;
-    }
-
-    @Override
-    public int read() {
-      if (position < length) {
-        return buffer[position++];
-      }
-      return -1;
-    }
-
-    @Override
-    public int read(byte[] bytes, int offset, int length) {
-      int lengthToRead = Math.min(length, this.length - this.position);
-      if (lengthToRead >= 0) {
-        for(int i=0; i < lengthToRead; ++i) {
-          bytes[offset + i] = buffer[position++];
-        }
-        return lengthToRead;
-      } else {
-        return -1;
-      }
-    }
-
-    @Override
-    public int read(long position, byte[] bytes, int offset, int length) {
-      this.position = (int) position;
-      return read(bytes, offset, length);
-    }
-
-    @Override
-    public void readFully(long position, byte[] bytes, int offset,
-                          int length) throws IOException {
-      this.position = (int) position;
-      while (length > 0) {
-        int result = read(bytes, offset, length);
-        offset += result;
-        length -= result;
-        if (result < 0) {
-          throw new IOException("Read past end of buffer at " + offset);
-        }
-      }
-    }
-
-    @Override
-    public void readFully(long position, byte[] bytes) throws IOException {
-      readFully(position, bytes, 0, bytes.length);
-    }
-
-    @Override
-    public void seek(long position) {
-      this.position = (int) position;
-    }
-
-    @Override
-    public long getPos() {
-      return position;
-    }
-
-    @Override
-    public boolean seekToNewSource(long position) throws IOException {
-      this.position = (int) position;
-      return false;
-    }
-  }
-
-  @Test
-  public void testMaxLengthToReader() throws Exception {
-    Configuration conf = new Configuration();
-    OrcProto.Type rowType = OrcProto.Type.newBuilder()
-        .setKind(OrcProto.Type.Kind.STRUCT).build();
-    OrcProto.Footer footer = OrcProto.Footer.newBuilder()
-        .setHeaderLength(0).setContentLength(0).setNumberOfRows(0)
-        .setRowIndexStride(0).addTypes(rowType).build();
-    OrcProto.PostScript ps = OrcProto.PostScript.newBuilder()
-        .setCompression(OrcProto.CompressionKind.NONE)
-        .setFooterLength(footer.getSerializedSize())
-        .setMagic("ORC").addVersion(0).addVersion(11).build();
-    DataOutputBuffer buffer = new DataOutputBuffer();
-    footer.writeTo(buffer);
-    ps.writeTo(buffer);
-    buffer.write(ps.getSerializedSize());
-    FileSystem fs = mock(FileSystem.class, settings);
-    FSDataInputStream file =
-        new FSDataInputStream(new BufferInStream(buffer.getData(),
-            buffer.getLength()));
-    Path p = new Path("/dir/file.orc");
-    when(fs.open(p)).thenReturn(file);
-    OrcFile.ReaderOptions options = OrcFile.readerOptions(conf);
-    options.filesystem(fs);
-    options.maxLength(buffer.getLength());
-    when(fs.getFileStatus(p))
-        .thenReturn(new FileStatus(10, false, 3, 3000, 0, p));
-    Reader reader = OrcFile.createReader(p, options);
-  }
-
-  @Test
-  public void testCompareToRangeInt() throws Exception {
-    assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange(19L, 20L, 40L));
-    assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange(41L, 20L, 40L));
-    assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange(20L, 20L, 40L));
-    assertEquals(Location.MIDDLE,
-        RecordReaderImpl.compareToRange(21L, 20L, 40L));
-    assertEquals(Location.MAX,
-      RecordReaderImpl.compareToRange(40L, 20L, 40L));
-    assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange(0L, 1L, 1L));
-    assertEquals(Location.MIN,
-      RecordReaderImpl.compareToRange(1L, 1L, 1L));
-    assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange(2L, 1L, 1L));
-  }
-
-  @Test
-  public void testCompareToRangeString() throws Exception {
-    assertEquals(Location.BEFORE,
-        RecordReaderImpl.compareToRange("a", "b", "c"));
-    assertEquals(Location.AFTER,
-        RecordReaderImpl.compareToRange("d", "b", "c"));
-    assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange("b", "b", "c"));
-    assertEquals(Location.MIDDLE,
-        RecordReaderImpl.compareToRange("bb", "b", "c"));
-    assertEquals(Location.MAX,
-        RecordReaderImpl.compareToRange("c", "b", "c"));
-    assertEquals(Location.BEFORE,
-        RecordReaderImpl.compareToRange("a", "b", "b"));
-    assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange("b", "b", "b"));
-    assertEquals(Location.AFTER,
-        RecordReaderImpl.compareToRange("c", "b", "b"));
-  }
-
-  @Test
-  public void testCompareToCharNeedConvert() throws Exception {
-    assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange("apple", "hello", "world"));
-    assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange("zombie", "hello", "world"));
-    assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange("hello", "hello", "world"));
-    assertEquals(Location.MIDDLE,
-        RecordReaderImpl.compareToRange("pilot", "hello", "world"));
-    assertEquals(Location.MAX,
-      RecordReaderImpl.compareToRange("world", "hello", "world"));
-    assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange("apple", "hello", "hello"));
-    assertEquals(Location.MIN,
-      RecordReaderImpl.compareToRange("hello", "hello", "hello"));
-    assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange("zombie", "hello", "hello"));
-  }
-
-  @Test
-  public void testGetMin() throws Exception {
-    assertEquals(10L, RecordReaderImpl.getMin(
-      ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
-    assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
-      OrcProto.ColumnStatistics.newBuilder()
-        .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
-          .setMinimum(10.0d).setMaximum(100.0d).build()).build())));
-    assertEquals(null, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
-      OrcProto.ColumnStatistics.newBuilder()
-        .setStringStatistics(OrcProto.StringStatistics.newBuilder().build())
-        .build())));
-    assertEquals("a", RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
-      OrcProto.ColumnStatistics.newBuilder()
-        .setStringStatistics(OrcProto.StringStatistics.newBuilder()
-          .setMinimum("a").setMaximum("b").build()).build())));
-    assertEquals("hello", RecordReaderImpl.getMin(ColumnStatisticsImpl
-      .deserialize(createStringStats("hello", "world"))));
-    assertEquals(HiveDecimal.create("111.1"), RecordReaderImpl.getMin(ColumnStatisticsImpl
-      .deserialize(createDecimalStats("111.1", "112.1"))));
-  }
-
-  private static OrcProto.ColumnStatistics createIntStats(Long min,
-                                                          Long max) {
-    OrcProto.IntegerStatistics.Builder intStats =
-        OrcProto.IntegerStatistics.newBuilder();
-    if (min != null) {
-      intStats.setMinimum(min);
-    }
-    if (max != null) {
-      intStats.setMaximum(max);
-    }
-    return OrcProto.ColumnStatistics.newBuilder()
-        .setIntStatistics(intStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createBooleanStats(int n, int trueCount) {
-    OrcProto.BucketStatistics.Builder boolStats = OrcProto.BucketStatistics.newBuilder();
-    boolStats.addCount(trueCount);
-    return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(n).setBucketStatistics(
-      boolStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createIntStats(int min, int max) {
-    OrcProto.IntegerStatistics.Builder intStats = OrcProto.IntegerStatistics.newBuilder();
-    intStats.setMinimum(min);
-    intStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setIntStatistics(intStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createDoubleStats(double min, double max) {
-    OrcProto.DoubleStatistics.Builder dblStats = OrcProto.DoubleStatistics.newBuilder();
-    dblStats.setMinimum(min);
-    dblStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createStringStats(String min, String max,
-      boolean hasNull) {
-    OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
-    strStats.setMinimum(min);
-    strStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build())
-        .setHasNull(hasNull).build();
-  }
-
-  private static OrcProto.ColumnStatistics createStringStats(String min, String max) {
-    OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
-    strStats.setMinimum(min);
-    strStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createDateStats(int min, int max) {
-    OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder();
-    dateStats.setMinimum(min);
-    dateStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setDateStatistics(dateStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createTimestampStats(long min, long max) {
-    OrcProto.TimestampStatistics.Builder tsStats = OrcProto.TimestampStatistics.newBuilder();
-    tsStats.setMinimum(min);
-    tsStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setTimestampStatistics(tsStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createDecimalStats(String min, String max) {
-    OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder();
-    decStats.setMinimum(min);
-    decStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build()).build();
-  }
-
-  private static OrcProto.ColumnStatistics createDecimalStats(String min, String max,
-      boolean hasNull) {
-    OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder();
-    decStats.setMinimum(min);
-    decStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build())
-        .setHasNull(hasNull).build();
-  }
-
-  @Test
-  public void testGetMax() throws Exception {
-    assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
-    assertEquals(100.0d, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
-        OrcProto.ColumnStatistics.newBuilder()
-            .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
-                .setMinimum(10.0d).setMaximum(100.0d).build()).build())));
-    assertEquals(null, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
-        OrcProto.ColumnStatistics.newBuilder()
-            .setStringStatistics(OrcProto.StringStatistics.newBuilder().build())
-            .build())));
-    assertEquals("b", RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
-        OrcProto.ColumnStatistics.newBuilder()
-            .setStringStatistics(OrcProto.StringStatistics.newBuilder()
-                .setMinimum("a").setMaximum("b").build()).build())));
-    assertEquals("world", RecordReaderImpl.getMax(ColumnStatisticsImpl
-      .deserialize(createStringStats("hello", "world"))));
-    assertEquals(HiveDecimal.create("112.1"), RecordReaderImpl.getMax(ColumnStatisticsImpl
-      .deserialize(createDecimalStats("111.1", "112.1"))));
-  }
-
-  @Test
-  public void testPredEvalWithBooleanStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
-
-    pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
-
-    pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", false, null);
-    assertEquals(TruthValue.NO,
-      RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
-    assertEquals(TruthValue.YES_NO,
-      RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
-  }
-
-  @Test
-  public void testPredEvalWithIntStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
-
-    // Stats gets converted to column type. "15" is outside of "10" and "100"
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "15", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
-
-    // Integer stats will not be converted date because of days/seconds/millis ambiguity
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
-    assertEquals(TruthValue.YES_NO,
-      RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
-  }
-
-  @Test
-  public void testPredEvalWithDoubleStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-
-    // Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0"
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "15", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-
-    // Double is not converted to date type because of days/seconds/millis ambiguity
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
-  }
-
-  @Test
-  public void testPredEvalWithStringStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.FLOAT, "x", 100.0, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "100", null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
-
-    // IllegalArgumentException is thrown when converting String to Date, hence YES_NO
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("100"), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
-  }
-
-  @Test
-  public void testPredEvalWithDateStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    // Date to Integer conversion is not possible.
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    // Date to Float conversion is also not possible.
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "15", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "1970-01-11", null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "15.1", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "__a15__1", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "2000-01-16", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "1970-01-16", null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    // Date to Decimal conversion is also not possible.
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
-  }
-
-  @Test
-  public void testPredEvalWithDecimalStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-
-    // "15" out of range of "10.0" and "100.0"
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "15", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-
-    // Decimal to Date not possible.
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
-  }
-
-  @Test
-  public void testPredEvalWithTimestampStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", "15", null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.STRING, "x", new Timestamp(15).toString(), null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10 * 24L * 60L * 60L * 1000L,
-          100 * 24L * 60L * 60L * 1000L), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
-
-    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
-  }
-
-  @Test
-  public void testEquals() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
-            "x", 15L, null);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
-  }
-
-  @Test
-  public void testNullSafeEquals() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
-            "x", 15L, null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
-  }
-
-  @Test
-  public void testLessThan() throws Exception {
-    PredicateLeaf lessThan = createPredicateLeaf
-        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
-            "x", 15L, null);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null));
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null));
-  }
-
-  @Test
-  public void testLessThanEquals() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
-            "x", 15L, null);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
-  }
-
-  @Test
-  public void testIn() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(10L);
-    args.add(20L);
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
-            "x", null, args);
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
-  }
-
-  @Test
-  public void testBetween() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(10L);
-    args.add(20L);
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
-            "x", null, args);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
-    assertEquals(TruthValue.NO_NULL,
-      RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-      RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null));
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null));
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
-  }
-
-  @Test
-  public void testIsNull() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
-            "x", null, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
-  }
-
-
-  @Test
-  public void testEqualsWithNullInStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
-            "x", "c", null);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
-  }
-
-  @Test
-  public void testNullSafeEqualsWithNullInStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
-            "x", "c", null);
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
-  }
-
-  @Test
-  public void testLessThanWithNullInStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
-            "x", "c", null);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
-    assertEquals(TruthValue.NO_NULL, // min, same stats
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
-  }
-
-  @Test
-  public void testLessThanEqualsWithNullInStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
-            "x", "c", null);
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
-  }
-
-  @Test
-  public void testInWithNullInStats() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add("c");
-    args.add("f");
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
-            "x", null, args);
-    assertEquals(TruthValue.NO_NULL, // before & after
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null)); // max
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
-    assertEquals(TruthValue.YES_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
-  }
-
-  @Test
-  public void testBetweenWithNullInStats() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add("c");
-    args.add("f");
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
-            "x", null, args);
-    assertEquals(TruthValue.YES_NULL, // before & after
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
-    assertEquals(TruthValue.YES_NULL, // before & max
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null));
-    assertEquals(TruthValue.NO_NULL, // before & before
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL, // before & min
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL, // before & middle
-      RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null));
-
-    assertEquals(TruthValue.YES_NULL, // min & after
-      RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null));
-    assertEquals(TruthValue.YES_NULL, // min & max
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null));
-    assertEquals(TruthValue.YES_NO_NULL, // min & middle
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null));
-
-    assertEquals(TruthValue.NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null)); // max
-    assertEquals(TruthValue.YES_NO_NULL,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
-    assertEquals(TruthValue.YES_NULL, // min & after, same stats
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
-  }
-
-  @Test
-  public void testIsNullWithNullInStats() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
-            "x", null, null);
-    assertEquals(TruthValue.YES_NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null));
-    assertEquals(TruthValue.NO,
-        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null));
-  }
-
-  @Test
-  public void testOverlap() throws Exception {
-    assertTrue(!RecordReaderUtils.overlap(0, 10, -10, -1));
-    assertTrue(RecordReaderUtils.overlap(0, 10, -1, 0));
-    assertTrue(RecordReaderUtils.overlap(0, 10, -1, 1));
-    assertTrue(RecordReaderUtils.overlap(0, 10, 2, 8));
-    assertTrue(RecordReaderUtils.overlap(0, 10, 5, 10));
-    assertTrue(RecordReaderUtils.overlap(0, 10, 10, 11));
-    assertTrue(RecordReaderUtils.overlap(0, 10, 0, 10));
-    assertTrue(RecordReaderUtils.overlap(0, 10, -1, 11));
-    assertTrue(!RecordReaderUtils.overlap(0, 10, 11, 12));
-  }
-
-  private static DiskRangeList diskRanges(Integer... points) {
-    DiskRangeList head = null, tail = null;
-    for(int i = 0; i < points.length; i += 2) {
-      DiskRangeList range = new DiskRangeList(points[i], points[i+1]);
-      if (tail == null) {
-        head = tail = range;
-      } else {
-        tail = tail.insertAfter(range);
-      }
-    }
-    return head;
-  }
-
-  @Test
-  public void testGetIndexPosition() throws Exception {
-    assertEquals(0, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
-            OrcProto.Stream.Kind.PRESENT, true, true));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
-            OrcProto.Stream.Kind.DATA, true, true));
-    assertEquals(3, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
-            OrcProto.Stream.Kind.DATA, false, true));
-    assertEquals(0, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
-            OrcProto.Stream.Kind.DATA, true, false));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DICTIONARY, OrcProto.Type.Kind.STRING,
-            OrcProto.Stream.Kind.DATA, true, true));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
-            OrcProto.Stream.Kind.DATA, true, true));
-    assertEquals(3, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
-            OrcProto.Stream.Kind.DATA, false, true));
-    assertEquals(6, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
-            OrcProto.Stream.Kind.LENGTH, true, true));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
-            OrcProto.Stream.Kind.LENGTH, false, true));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
-            OrcProto.Stream.Kind.DATA, true, true));
-    assertEquals(3, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
-            OrcProto.Stream.Kind.DATA, false, true));
-    assertEquals(6, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
-            OrcProto.Stream.Kind.SECONDARY, true, true));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
-            OrcProto.Stream.Kind.SECONDARY, false, true));
-    assertEquals(4, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
-            OrcProto.Stream.Kind.DATA, true, true));
-    assertEquals(3, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
-            OrcProto.Stream.Kind.DATA, false, true));
-    assertEquals(7, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
-            OrcProto.Stream.Kind.SECONDARY, true, true));
-    assertEquals(5, RecordReaderUtils.getIndexPosition
-        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
-            OrcProto.Stream.Kind.SECONDARY, false, true));
-  }
-
-  @Test
-  public void testPartialPlan() throws Exception {
-    DiskRangeList result;
-
-    // set the streams
-    List<OrcProto.Stream> streams = new ArrayList<OrcProto.Stream>();
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.PRESENT)
-        .setColumn(1).setLength(1000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DATA)
-        .setColumn(1).setLength(99000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.PRESENT)
-        .setColumn(2).setLength(2000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DATA)
-        .setColumn(2).setLength(98000).build());
-
-    boolean[] columns = new boolean[]{true, true, false};
-    boolean[] rowGroups = new boolean[]{true, true, false, false, true, false};
-
-    // set the index
-    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[columns.length];
-    indexes[1] = OrcProto.RowIndex.newBuilder()
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(0).addPositions(-1).addPositions(-1)
-            .addPositions(0)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(100).addPositions(-1).addPositions(-1)
-            .addPositions(10000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(200).addPositions(-1).addPositions(-1)
-            .addPositions(20000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(300).addPositions(-1).addPositions(-1)
-            .addPositions(30000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(400).addPositions(-1).addPositions(-1)
-            .addPositions(40000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(500).addPositions(-1).addPositions(-1)
-            .addPositions(50000)
-            .build())
-        .build();
-
-    // set encodings
-    List<OrcProto.ColumnEncoding> encodings =
-        new ArrayList<OrcProto.ColumnEncoding>();
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-                    .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-
-    // set types struct{x: int, y: int}
-    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
-                .addSubtypes(1).addSubtypes(2).addFieldNames("x")
-                .addFieldNames("y").build());
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
-
-    // filter by rows and groups
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(0, 1000, 100, 1000, 400, 1000,
-        1000, 11000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
-        11000, 21000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
-        41000, 51000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, true);
-    assertThat(result, is(diskRanges(0, 21000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
-        41000, 51000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
-
-    // if we read no rows, don't read any bytes
-    rowGroups = new boolean[]{false, false, false, false, false, false};
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, false);
-    assertNull(result);
-
-    // all rows, but only columns 0 and 2.
-    rowGroups = null;
-    columns = new boolean[]{true, false, true};
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, null, false, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(100000, 102000, 102000, 200000)));
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, null, false, encodings, types, 32768, true);
-    assertThat(result, is(diskRanges(100000, 200000)));
-
-    rowGroups = new boolean[]{false, true, false, false, false, false};
-    indexes[2] = indexes[1];
-    indexes[1] = null;
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(100100, 102000,
-        112000, 122000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, true);
-    assertThat(result, is(diskRanges(100100, 102000,
-        112000, 122000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
-
-    rowGroups = new boolean[]{false, false, false, false, false, true};
-    indexes[1] = indexes[2];
-    columns = new boolean[]{true, true, true};
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(500, 1000, 51000, 100000, 100500, 102000,
-        152000, 200000)));
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, true);
-    assertThat(result, is(diskRanges(500, 1000, 51000, 100000, 100500, 102000,
-        152000, 200000)));
-  }
-
-
-  @Test
-  public void testPartialPlanCompressed() throws Exception {
-    DiskRangeList result;
-
-    // set the streams
-    List<OrcProto.Stream> streams = new ArrayList<OrcProto.Stream>();
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.PRESENT)
-        .setColumn(1).setLength(1000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DATA)
-        .setColumn(1).setLength(99000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.PRESENT)
-        .setColumn(2).setLength(2000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DATA)
-        .setColumn(2).setLength(98000).build());
-
-    boolean[] columns = new boolean[]{true, true, false};
-    boolean[] rowGroups = new boolean[]{true, true, false, false, true, false};
-
-    // set the index
-    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[columns.length];
-    indexes[1] = OrcProto.RowIndex.newBuilder()
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(0).addPositions(-1).addPositions(-1).addPositions(-1)
-            .addPositions(0)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(100).addPositions(-1).addPositions(-1).addPositions(-1)
-            .addPositions(10000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(200).addPositions(-1).addPositions(-1).addPositions(-1)
-            .addPositions(20000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(300).addPositions(-1).addPositions(-1).addPositions(-1)
-            .addPositions(30000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(400).addPositions(-1).addPositions(-1).addPositions(-1)
-            .addPositions(40000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(500).addPositions(-1).addPositions(-1).addPositions(-1)
-            .addPositions(50000)
-            .build())
-        .build();
-
-    // set encodings
-    List<OrcProto.ColumnEncoding> encodings =
-        new ArrayList<OrcProto.ColumnEncoding>();
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-
-    // set types struct{x: int, y: int}
-    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
-        .addSubtypes(1).addSubtypes(2).addFieldNames("x")
-        .addFieldNames("y").build());
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
-
-    // filter by rows and groups
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, true, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(0, 1000, 100, 1000,
-        400, 1000, 1000, 11000+(2*32771),
-        11000, 21000+(2*32771), 41000, 100000)));
-
-    rowGroups = new boolean[]{false, false, false, false, false, true};
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, true, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(500, 1000, 51000, 100000)));
-  }
-
-  @Test
-  public void testPartialPlanString() throws Exception {
-    DiskRangeList result;
-
-    // set the streams
-    List<OrcProto.Stream> streams = new ArrayList<OrcProto.Stream>();
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.PRESENT)
-        .setColumn(1).setLength(1000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DATA)
-        .setColumn(1).setLength(94000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.LENGTH)
-        .setColumn(1).setLength(2000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DICTIONARY_DATA)
-        .setColumn(1).setLength(3000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.PRESENT)
-        .setColumn(2).setLength(2000).build());
-    streams.add(OrcProto.Stream.newBuilder()
-        .setKind(OrcProto.Stream.Kind.DATA)
-        .setColumn(2).setLength(98000).build());
-
-    boolean[] columns = new boolean[]{true, true, false};
-    boolean[] rowGroups = new boolean[]{false, true, false, false, true, true};
-
-    // set the index
-    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[columns.length];
-    indexes[1] = OrcProto.RowIndex.newBuilder()
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(0).addPositions(-1).addPositions(-1)
-            .addPositions(0)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(100).addPositions(-1).addPositions(-1)
-            .addPositions(10000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(200).addPositions(-1).addPositions(-1)
-            .addPositions(20000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(300).addPositions(-1).addPositions(-1)
-            .addPositions(30000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(400).addPositions(-1).addPositions(-1)
-            .addPositions(40000)
-            .build())
-        .addEntry(OrcProto.RowIndexEntry.newBuilder()
-            .addPositions(500).addPositions(-1).addPositions(-1)
-            .addPositions(50000)
-            .build())
-        .build();
-
-    // set encodings
-    List<OrcProto.ColumnEncoding> encodings =
-        new ArrayList<OrcProto.ColumnEncoding>();
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DICTIONARY).build());
-    encodings.add(OrcProto.ColumnEncoding.newBuilder()
-        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
-
-    // set types struct{x: string, y: int}
-    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
-        .addSubtypes(1).addSubtypes(2).addFieldNames("x")
-        .addFieldNames("y").build());
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).build());
-    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
-
-    // filter by rows and groups
-    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
-        columns, rowGroups, false, encodings, types, 32768, false);
-    assertThat(result, is(diskRanges(100, 1000, 400, 1000, 500, 1000,
-        11000, 21000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
-        41000, 51000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
-        51000, 95000, 95000, 97000, 97000, 100000)));
-  }
-
-  @Test
-  public void testIntNullSafeEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong(i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
-    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong(15);
-    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testIntEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong(i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong(15);
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testIntInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(15L);
-    args.add(19L);
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong(i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong(19);
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong(15);
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDoubleNullSafeEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addDouble(i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
-    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addDouble(15.0);
-    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDoubleEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addDouble(i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addDouble(15.0);
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDoubleInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(15.0);
-    args.add(19.0);
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.FLOAT,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addDouble(i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addDouble(19.0);
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addDouble(15.0);
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testStringNullSafeEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString("str_" + i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
-    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString("str_15");
-    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testStringEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString("str_" + i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString("str_15");
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testStringInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add("str_15");
-    args.add("str_19");
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString("str_" + i);
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString("str_19");
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString("str_15");
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDateWritableNullSafeEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x",
-        new DateWritable(15).get(), null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong((new DateWritable(i)).getDays());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
-    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new DateWritable(15)).getDays());
-    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDateWritableEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x",
-        new DateWritable(15).get(), null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong((new DateWritable(i)).getDays());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new DateWritable(15)).getDays());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDateWritableInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(new DateWritable(15).get());
-    args.add(new DateWritable(19).get());
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong((new DateWritable(i)).getDays());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new DateWritable(19)).getDays());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new DateWritable(15)).getDays());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testTimestampNullSafeEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x",
-        new Timestamp(15),
-        null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong((new Timestamp(i)).getTime());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new Timestamp(15)).getTime());
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testTimestampEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong((new Timestamp(i)).getTime());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new Timestamp(15)).getTime());
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testTimestampInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(new Timestamp(15));
-    args.add(new Timestamp(19));
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.TIMESTAMP,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addLong((new Timestamp(i)).getTime());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new Timestamp(19)).getTime());
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addLong((new Timestamp(15)).getTime());
-    // timestamp PPD is disable until ORC-135
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDecimalNullSafeEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x",
-        new HiveDecimalWritable("15"),
-        null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString(HiveDecimal.create(i).toString());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
-    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString(HiveDecimal.create(15).toString());
-    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDecimalEqualsBloomFilter() throws Exception {
-    PredicateLeaf pred = createPredicateLeaf(
-        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x",
-        new HiveDecimalWritable("15"),
-        null);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString(HiveDecimal.create(i).toString());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString(HiveDecimal.create(15).toString());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testDecimalInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(new HiveDecimalWritable("15"));
-    args.add(new HiveDecimalWritable("19"));
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString(HiveDecimal.create(i).toString());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
-    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString(HiveDecimal.create(19).toString());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString(HiveDecimal.create(15).toString());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testNullsInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(new HiveDecimalWritable("15"));
-    args.add(null);
-    args.add(new HiveDecimalWritable("19"));
-    PredicateLeaf pred = createPredicateLeaf
-        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL,
-            "x", null, args);
-    BloomFilterIO bf = new BloomFilterIO(10000);
-    for (int i = 20; i < 1000; i++) {
-      bf.addString(HiveDecimal.create(i).toString());
-    }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200", false));
-    // hasNull is false, so bloom filter should return NO
-    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200", true));
-    // hasNull is true, so bloom filter should return YES_NO_NULL
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString(HiveDecimal.create(19).toString());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-
-    bf.addString(HiveDecimal.create(15).toString());
-    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
-  }
-
-  @Test
-  public void testClose() throws Exception {
-    DataReader mockedDataReader = mock(DataReader.class);
-    closeMockedRecordReader(mockedDataReader);
-
-    verify(mockedDataReader, atLeastOnce()).close();
-  }
-
-  @Test
-  public void testCloseWithException() throws Exception {
-    DataReader mockedDataReader = mock(DataReader.class);
-    doThrow(IOException.class).when(mockedDataReader).close();
-
-    try {
-      closeMockedRecordReader(mockedDataReader);
-      fail("Exception should have been thrown when Record Reader was closed");
-    } catch (IOException expected) {
-
-    }
-
-    verify(mockedDataReader, atLeastOnce()).close();
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-
-  private void closeMockedRecordReader(DataReader mockedDataReader) throws IOException {
-    Configuration conf = new Configuration();
-    Path path = new Path(workDir, "empty.orc");
-    FileSystem.get(conf).delete(path, true);
-    Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
-        .setSchema(TypeDescription.createLong()));
-    writer.close();
-    Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
-
-    RecordReader recordReader = reader.rows(new Reader.Options()
-        .dataReader(mockedDataReader));
-
-    recordReader.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestRunLengthByteReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestRunLengthByteReader.java b/orc/src/test/org/apache/orc/impl/TestRunLengthByteReader.java
deleted file mode 100644
index a14bef1..0000000
--- a/orc/src/test/org/apache/orc/impl/TestRunLengthByteReader.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-
-public class TestRunLengthByteReader {
-
-  @Test
-  public void testUncompressedSeek() throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
-        null, collect));
-    TestInStream.PositionCollector[] positions =
-        new TestInStream.PositionCollector[2048];
-    for(int i=0; i < 2048; ++i) {
-      positions[i] = new TestInStream.PositionCollector();
-      out.getPosition(positions[i]);
-      if (i < 1024) {
-        out.write((byte) (i/4));
-      } else {
-        out.write((byte) i);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
-        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), null, 100));
-    for(int i=0; i < 2048; ++i) {
-      int x = in.next() & 0xff;
-      if (i < 1024) {
-        assertEquals((i/4) & 0xff, x);
-      } else {
-        assertEquals(i & 0xff, x);
-      }
-    }
-    for(int i=2047; i >= 0; --i) {
-      in.seek(positions[i]);
-      int x = in.next() & 0xff;
-      if (i < 1024) {
-        assertEquals((i/4) & 0xff, x);
-      } else {
-        assertEquals(i & 0xff, x);
-      }
-    }
-  }
-
-  @Test
-  public void testCompressedSeek() throws Exception {
-    CompressionCodec codec = new SnappyCodec();
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 500,
-        codec, collect));
-    TestInStream.PositionCollector[] positions =
-        new TestInStream.PositionCollector[2048];
-    for(int i=0; i < 2048; ++i) {
-      positions[i] = new TestInStream.PositionCollector();
-      out.getPosition(positions[i]);
-      if (i < 1024) {
-        out.write((byte) (i/4));
-      } else {
-        out.write((byte) i);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
-        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), codec, 500));
-    for(int i=0; i < 2048; ++i) {
-      int x = in.next() & 0xff;
-      if (i < 1024) {
-        assertEquals((i/4) & 0xff, x);
-      } else {
-        assertEquals(i & 0xff, x);
-      }
-    }
-    for(int i=2047; i >= 0; --i) {
-      in.seek(positions[i]);
-      int x = in.next() & 0xff;
-      if (i < 1024) {
-        assertEquals((i/4) & 0xff, x);
-      } else {
-        assertEquals(i & 0xff, x);
-      }
-    }
-  }
-
-  @Test
-  public void testSkips() throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
-        null, collect));
-    for(int i=0; i < 2048; ++i) {
-      if (i < 1024) {
-        out.write((byte) (i/16));
-      } else {
-        out.write((byte) i);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
-        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), null, 100));
-    for(int i=0; i < 2048; i += 10) {
-      int x = in.next() & 0xff;
-      if (i < 1024) {
-        assertEquals((i/16) & 0xff, x);
-      } else {
-        assertEquals(i & 0xff, x);
-      }
-      if (i < 2038) {
-        in.skip(9);
-      }
-      in.skip(0);
-    }
-  }
-}


[25/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/WriterImpl.java b/orc/src/java/org/apache/hive/orc/impl/WriterImpl.java
new file mode 100644
index 0000000..1e273c0
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/WriterImpl.java
@@ -0,0 +1,2443 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.TreeMap;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hive.orc.BinaryColumnStatistics;
+import org.apache.hive.orc.BloomFilterIO;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StripeInformation;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.OrcUtils;
+import org.apache.hive.orc.StringColumnStatistics;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.io.Text;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+import com.google.protobuf.ByteString;
+
+/**
+ * An ORC file writer. The file is divided into stripes, which is the natural
+ * unit of work when reading. Each stripe is buffered in memory until the
+ * memory reaches the stripe size and then it is written out broken down by
+ * columns. Each column is written by a TreeWriter that is specific to that
+ * type of column. TreeWriters may have children TreeWriters that handle the
+ * sub-types. Each of the TreeWriters writes the column's data as a set of
+ * streams.
+ *
+ * This class is unsynchronized like most Stream objects, so from the creation
+ * of an OrcFile and all access to a single instance has to be from a single
+ * thread.
+ *
+ * There are no known cases where these happen between different threads today.
+ *
+ * Caveat: the MemoryManager is created during WriterOptions create, that has
+ * to be confined to a single thread as well.
+ *
+ */
+public class WriterImpl implements Writer, MemoryManager.Callback {
+
+  private static final Logger LOG = LoggerFactory.getLogger(WriterImpl.class);
+
+  private static final int MIN_ROW_INDEX_STRIDE = 1000;
+
+  private final Path path;
+  private final int rowIndexStride;
+  private final TypeDescription schema;
+
+  @VisibleForTesting
+  protected final PhysicalWriter physWriter;
+  private int columnCount;
+  private long rowCount = 0;
+  private long rowsInStripe = 0;
+  private long rawDataSize = 0;
+  private int rowsInIndex = 0;
+  private int stripesAtLastFlush = -1;
+  private final List<OrcProto.StripeInformation> stripes =
+    new ArrayList<OrcProto.StripeInformation>();
+  private final Map<String, ByteString> userMetadata =
+    new TreeMap<String, ByteString>();
+  private final StreamFactory streamFactory = new StreamFactory();
+  private final TreeWriter treeWriter;
+  private final boolean buildIndex;
+  private final MemoryManager memoryManager;
+  private final OrcFile.Version version;
+  private final Configuration conf;
+  private final OrcFile.WriterCallback callback;
+  private final OrcFile.WriterContext callbackContext;
+  private final OrcFile.EncodingStrategy encodingStrategy;
+  private final boolean[] bloomFilterColumns;
+  private final double bloomFilterFpp;
+  private boolean writeTimeZone;
+
+  public WriterImpl(FileSystem fs,
+                    Path path,
+                    OrcFile.WriterOptions opts) throws IOException {
+    this(new PhysicalFsWriter(fs, path, opts.getSchema().getMaximumId() + 1, opts), path, opts);
+  }
+
+  public WriterImpl(PhysicalWriter writer,
+                    Path pathForMem,
+                    OrcFile.WriterOptions opts) throws IOException {
+    this.physWriter = writer;
+    this.path = pathForMem;
+    this.conf = opts.getConfiguration();
+    this.schema = opts.getSchema();
+    this.callback = opts.getCallback();
+    if (callback != null) {
+      callbackContext = new OrcFile.WriterContext(){
+
+        @Override
+        public Writer getWriter() {
+          return WriterImpl.this;
+        }
+      };
+    } else {
+      callbackContext = null;
+    }
+    this.version = opts.getVersion();
+    this.encodingStrategy = opts.getEncodingStrategy();
+    this.rowIndexStride = opts.getRowIndexStride();
+    this.memoryManager = opts.getMemoryManager();
+    buildIndex = rowIndexStride > 0;
+    if (version == OrcFile.Version.V_0_11) {
+      /* do not write bloom filters for ORC v11 */
+      this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
+    } else {
+      this.bloomFilterColumns =
+          OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
+    }
+    this.bloomFilterFpp = opts.getBloomFilterFpp();
+    treeWriter = createTreeWriter(schema, streamFactory, false);
+    if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
+      throw new IllegalArgumentException("Row stride must be at least " +
+          MIN_ROW_INDEX_STRIDE);
+    }
+
+    // ensure that we are able to handle callbacks before we register ourselves
+    if (path != null) {
+      memoryManager.addWriter(path, opts.getStripeSize(), this);
+    }
+  }
+
+  @Override
+  public boolean checkMemory(double newScale) throws IOException {
+    long limit = (long) Math.round(physWriter.getPhysicalStripeSize() * newScale);
+    long size = estimateStripeSize();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("ORC writer " + path + " size = " + size + " limit = " +
+                limit);
+    }
+    if (size > limit) {
+      flushStripe();
+      return true;
+    }
+    return false;
+  }
+
+  private static class RowIndexPositionRecorder implements PositionRecorder {
+    private final OrcProto.RowIndexEntry.Builder builder;
+
+    RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) {
+      this.builder = builder;
+    }
+
+    @Override
+    public void addPosition(long position) {
+      builder.addPositions(position);
+    }
+  }
+
+  /**
+   * Interface from the Writer to the TreeWriters. This limits the visibility
+   * that the TreeWriters have into the Writer.
+   */
+  private class StreamFactory {
+    /**
+     * Create a stream to store part of a column.
+     * @param column the column id for the stream
+     * @param kind the kind of stream
+     * @return The output outStream that the section needs to be written to.
+     * @throws IOException
+     */
+    public OutStream createStream(int column,
+                                  OrcProto.Stream.Kind kind
+                                  ) throws IOException {
+      final StreamName name = new StreamName(column, kind);
+      return physWriter.getOrCreatePhysicalStream(name);
+    }
+
+    public void writeIndex(int column, OrcProto.RowIndex.Builder rowIndex) throws IOException {
+      physWriter.writeIndexStream(new StreamName(column, OrcProto.Stream.Kind.ROW_INDEX), rowIndex);
+    }
+
+    public void writeBloomFilter(
+        int column, OrcProto.BloomFilterIndex.Builder bloomFilterIndex) throws IOException {
+      physWriter.writeBloomFilterStream(
+          new StreamName(column, OrcProto.Stream.Kind.BLOOM_FILTER), bloomFilterIndex);
+    }
+    /**
+     * Get the next column id.
+     * @return a number from 0 to the number of columns - 1
+     */
+    public int getNextColumnId() {
+      return columnCount++;
+    }
+
+    /**
+     * Get the stride rate of the row index.
+     */
+    public int getRowIndexStride() {
+      return rowIndexStride;
+    }
+
+    /**
+     * Should be building the row index.
+     * @return true if we are building the index
+     */
+    public boolean buildIndex() {
+      return buildIndex;
+    }
+
+    /**
+     * Is the ORC file compressed?
+     * @return are the streams compressed
+     */
+    public boolean isCompressed() {
+      return physWriter.isCompressed();
+    }
+
+    /**
+     * Get the encoding strategy to use.
+     * @return encoding strategy
+     */
+    public OrcFile.EncodingStrategy getEncodingStrategy() {
+      return encodingStrategy;
+    }
+
+    /**
+     * Get the bloom filter columns
+     * @return bloom filter columns
+     */
+    public boolean[] getBloomFilterColumns() {
+      return bloomFilterColumns;
+    }
+
+    /**
+     * Get bloom filter false positive percentage.
+     * @return fpp
+     */
+    public double getBloomFilterFPP() {
+      return bloomFilterFpp;
+    }
+
+    /**
+     * Get the writer's configuration.
+     * @return configuration
+     */
+    public Configuration getConfiguration() {
+      return conf;
+    }
+
+    /**
+     * Get the version of the file to write.
+     */
+    public OrcFile.Version getVersion() {
+      return version;
+    }
+
+    public void useWriterTimeZone(boolean val) {
+      writeTimeZone = val;
+    }
+
+    public boolean hasWriterTimeZone() {
+      return writeTimeZone;
+    }
+  }
+
+  /**
+   * The parent class of all of the writers for each column. Each column
+   * is written by an instance of this class. The compound types (struct,
+   * list, map, and union) have children tree writers that write the children
+   * types.
+   */
+  private abstract static class TreeWriter {
+    protected final int id;
+    protected final BitFieldWriter isPresent;
+    private final boolean isCompressed;
+    protected final ColumnStatisticsImpl indexStatistics;
+    protected final ColumnStatisticsImpl stripeColStatistics;
+    private final ColumnStatisticsImpl fileStatistics;
+    protected TreeWriter[] childrenWriters;
+    protected final RowIndexPositionRecorder rowIndexPosition;
+    private final OrcProto.RowIndex.Builder rowIndex;
+    private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
+    protected final BloomFilterIO bloomFilter;
+    protected final boolean createBloomFilter;
+    private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex;
+    private final OrcProto.BloomFilter.Builder bloomFilterEntry;
+    private boolean foundNulls;
+    private OutStream isPresentOutStream;
+    private final List<OrcProto.StripeStatistics.Builder> stripeStatsBuilders;
+    private final StreamFactory streamFactory;
+
+    /**
+     * Create a tree writer.
+     * @param columnId the column id of the column to write
+     * @param schema the row schema
+     * @param streamFactory limited access to the Writer's data.
+     * @param nullable can the value be null?
+     * @throws IOException
+     */
+    TreeWriter(int columnId,
+               TypeDescription schema,
+               StreamFactory streamFactory,
+               boolean nullable) throws IOException {
+      this.streamFactory = streamFactory;
+      this.isCompressed = streamFactory.isCompressed();
+      this.id = columnId;
+      if (nullable) {
+        isPresentOutStream = streamFactory.createStream(id,
+            OrcProto.Stream.Kind.PRESENT);
+        isPresent = new BitFieldWriter(isPresentOutStream, 1);
+      } else {
+        isPresent = null;
+      }
+      this.foundNulls = false;
+      createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
+      indexStatistics = ColumnStatisticsImpl.create(schema);
+      stripeColStatistics = ColumnStatisticsImpl.create(schema);
+      fileStatistics = ColumnStatisticsImpl.create(schema);
+      childrenWriters = new TreeWriter[0];
+      rowIndex = OrcProto.RowIndex.newBuilder();
+      rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
+      rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
+      stripeStatsBuilders = Lists.newArrayList();
+      if (createBloomFilter) {
+        bloomFilterEntry = OrcProto.BloomFilter.newBuilder();
+        bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder();
+        bloomFilter = new BloomFilterIO(streamFactory.getRowIndexStride(),
+            streamFactory.getBloomFilterFPP());
+      } else {
+        bloomFilterEntry = null;
+        bloomFilterIndex = null;
+        bloomFilter = null;
+      }
+    }
+
+    protected OrcProto.RowIndex.Builder getRowIndex() {
+      return rowIndex;
+    }
+
+    protected ColumnStatisticsImpl getStripeStatistics() {
+      return stripeColStatistics;
+    }
+
+    protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() {
+      return rowIndexEntry;
+    }
+
+    IntegerWriter createIntegerWriter(PositionedOutputStream output,
+                                      boolean signed, boolean isDirectV2,
+                                      StreamFactory writer) {
+      if (isDirectV2) {
+        boolean alignedBitpacking = false;
+        if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
+          alignedBitpacking = true;
+        }
+        return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
+      } else {
+        return new RunLengthIntegerWriter(output, signed);
+      }
+    }
+
+    boolean isNewWriteFormat(StreamFactory writer) {
+      return writer.getVersion() != OrcFile.Version.V_0_11;
+    }
+
+    /**
+     * Handle the top level object write.
+     *
+     * This default method is used for all types except structs, which are the
+     * typical case. VectorizedRowBatch assumes the top level object is a
+     * struct, so we use the first column for all other types.
+     * @param batch the batch to write from
+     * @param offset the row to start on
+     * @param length the number of rows to write
+     * @throws IOException
+     */
+    void writeRootBatch(VectorizedRowBatch batch, int offset,
+                        int length) throws IOException {
+      writeBatch(batch.cols[0], offset, length);
+    }
+
+    /**
+     * Write the values from the given vector from offset for length elements.
+     * @param vector the vector to write from
+     * @param offset the first value from the vector to write
+     * @param length the number of values from the vector to write
+     * @throws IOException
+     */
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      if (vector.noNulls) {
+        indexStatistics.increment(length);
+        if (isPresent != null) {
+          for (int i = 0; i < length; ++i) {
+            isPresent.write(1);
+          }
+        }
+      } else {
+        if (vector.isRepeating) {
+          boolean isNull = vector.isNull[0];
+          if (isPresent != null) {
+            for (int i = 0; i < length; ++i) {
+              isPresent.write(isNull ? 0 : 1);
+            }
+          }
+          if (isNull) {
+            foundNulls = true;
+            indexStatistics.setNull();
+          } else {
+            indexStatistics.increment(length);
+          }
+        } else {
+          // count the number of non-null values
+          int nonNullCount = 0;
+          for(int i = 0; i < length; ++i) {
+            boolean isNull = vector.isNull[i + offset];
+            if (!isNull) {
+              nonNullCount += 1;
+            }
+            if (isPresent != null) {
+              isPresent.write(isNull ? 0 : 1);
+            }
+          }
+          indexStatistics.increment(nonNullCount);
+          if (nonNullCount != length) {
+            foundNulls = true;
+            indexStatistics.setNull();
+          }
+        }
+      }
+    }
+
+    private void removeIsPresentPositions() {
+      for(int i=0; i < rowIndex.getEntryCount(); ++i) {
+        OrcProto.RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i);
+        List<Long> positions = entry.getPositionsList();
+        // bit streams use 3 positions if uncompressed, 4 if compressed
+        positions = positions.subList(isCompressed ? 4 : 3, positions.size());
+        entry.clearPositions();
+        entry.addAllPositions(positions);
+      }
+    }
+
+    /**
+     * Write the stripe out to the file.
+     * @param builder the stripe footer that contains the information about the
+     *                layout of the stripe. The TreeWriter is required to update
+     *                the footer with its information.
+     * @param requiredIndexEntries the number of index entries that are
+     *                             required. this is to check to make sure the
+     *                             row index is well formed.
+     * @throws IOException
+     */
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      if (isPresent != null) {
+        isPresent.flush();
+
+        // if no nulls are found in a stream, then suppress the stream
+        if (!foundNulls) {
+          isPresentOutStream.suppress();
+          // since isPresent bitstream is suppressed, update the index to
+          // remove the positions of the isPresent stream
+          if (streamFactory.buildIndex()) {
+            removeIsPresentPositions();
+          }
+        }
+      }
+
+      // merge stripe-level column statistics to file statistics and write it to
+      // stripe statistics
+      OrcProto.StripeStatistics.Builder stripeStatsBuilder = OrcProto.StripeStatistics.newBuilder();
+      writeStripeStatistics(stripeStatsBuilder, this);
+      stripeStatsBuilders.add(stripeStatsBuilder);
+
+      // reset the flag for next stripe
+      foundNulls = false;
+
+      builder.addColumns(getEncoding());
+      if (streamFactory.hasWriterTimeZone()) {
+        builder.setWriterTimezone(TimeZone.getDefault().getID());
+      }
+      if (streamFactory.buildIndex()) {
+        if (rowIndex.getEntryCount() != requiredIndexEntries) {
+          throw new IllegalArgumentException("Column has wrong number of " +
+               "index entries found: " + rowIndex.getEntryCount() + " expected: " +
+               requiredIndexEntries);
+        }
+        streamFactory.writeIndex(id, rowIndex);
+      }
+
+      rowIndex.clear();
+      rowIndexEntry.clear();
+
+      // write the bloom filter to out stream
+      if (createBloomFilter) {
+        streamFactory.writeBloomFilter(id, bloomFilterIndex);
+        bloomFilterIndex.clear();
+        bloomFilterEntry.clear();
+      }
+    }
+
+    private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder,
+        TreeWriter treeWriter) {
+      treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics);
+      builder.addColStats(treeWriter.stripeColStatistics.serialize().build());
+      treeWriter.stripeColStatistics.reset();
+      for (TreeWriter child : treeWriter.getChildrenWriters()) {
+        writeStripeStatistics(builder, child);
+      }
+    }
+
+    TreeWriter[] getChildrenWriters() {
+      return childrenWriters;
+    }
+
+    /**
+     * Get the encoding for this column.
+     * @return the information about the encoding of this column
+     */
+    OrcProto.ColumnEncoding getEncoding() {
+      return OrcProto.ColumnEncoding.newBuilder().setKind(
+          OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    /**
+     * Create a row index entry with the previous location and the current
+     * index statistics. Also merges the index statistics into the file
+     * statistics before they are cleared. Finally, it records the start of the
+     * next index and ensures all of the children columns also create an entry.
+     * @throws IOException
+     */
+    void createRowIndexEntry() throws IOException {
+      stripeColStatistics.merge(indexStatistics);
+      rowIndexEntry.setStatistics(indexStatistics.serialize());
+      indexStatistics.reset();
+      rowIndex.addEntry(rowIndexEntry);
+      rowIndexEntry.clear();
+      addBloomFilterEntry();
+      recordPosition(rowIndexPosition);
+      for(TreeWriter child: childrenWriters) {
+        child.createRowIndexEntry();
+      }
+    }
+
+    void addBloomFilterEntry() {
+      if (createBloomFilter) {
+        bloomFilterEntry.setNumHashFunctions(bloomFilter.getNumHashFunctions());
+        bloomFilterEntry.addAllBitset(Longs.asList(bloomFilter.getBitSet()));
+        bloomFilterIndex.addBloomFilter(bloomFilterEntry.build());
+        bloomFilter.reset();
+        bloomFilterEntry.clear();
+      }
+    }
+
+    /**
+     * Record the current position in each of this column's streams.
+     * @param recorder where should the locations be recorded
+     * @throws IOException
+     */
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      if (isPresent != null) {
+        isPresent.getPosition(recorder);
+      }
+    }
+
+    /**
+     * Estimate how much memory the writer is consuming excluding the streams.
+     * @return the number of bytes.
+     */
+    long estimateMemory() {
+      long result = 0;
+      for (TreeWriter child: childrenWriters) {
+        result += child.estimateMemory();
+      }
+      return result;
+    }
+  }
+
+  private static class BooleanTreeWriter extends TreeWriter {
+    private final BitFieldWriter writer;
+
+    BooleanTreeWriter(int columnId,
+                      TypeDescription schema,
+                      StreamFactory writer,
+                      boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      PositionedOutputStream out = writer.createStream(id,
+          OrcProto.Stream.Kind.DATA);
+      this.writer = new BitFieldWriter(out, 1);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      LongColumnVector vec = (LongColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          int value = vec.vector[0] == 0 ? 0 : 1;
+          indexStatistics.updateBoolean(value != 0, length);
+          for(int i=0; i < length; ++i) {
+            writer.write(value);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            int value = vec.vector[i + offset] == 0 ? 0 : 1;
+            writer.write(value);
+            indexStatistics.updateBoolean(value != 0, 1);
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      writer.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      writer.getPosition(recorder);
+    }
+  }
+
+  private static class ByteTreeWriter extends TreeWriter {
+    private final RunLengthByteWriter writer;
+
+    ByteTreeWriter(int columnId,
+                      TypeDescription schema,
+                      StreamFactory writer,
+                      boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.writer = new RunLengthByteWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.DATA));
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      LongColumnVector vec = (LongColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          byte value = (byte) vec.vector[0];
+          indexStatistics.updateInteger(value, length);
+          if (createBloomFilter) {
+            bloomFilter.addLong(value);
+          }
+          for(int i=0; i < length; ++i) {
+            writer.write(value);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            byte value = (byte) vec.vector[i + offset];
+            writer.write(value);
+            indexStatistics.updateInteger(value, 1);
+            if (createBloomFilter) {
+              bloomFilter.addLong(value);
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      writer.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      writer.getPosition(recorder);
+    }
+  }
+
+  private static class IntegerTreeWriter extends TreeWriter {
+    private final IntegerWriter writer;
+    private boolean isDirectV2 = true;
+
+    IntegerTreeWriter(int columnId,
+                      TypeDescription schema,
+                      StreamFactory writer,
+                      boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      OutStream out = writer.createStream(id,
+          OrcProto.Stream.Kind.DATA);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      this.writer = createIntegerWriter(out, true, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      LongColumnVector vec = (LongColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          long value = vec.vector[0];
+          indexStatistics.updateInteger(value, length);
+          if (createBloomFilter) {
+            bloomFilter.addLong(value);
+          }
+          for(int i=0; i < length; ++i) {
+            writer.write(value);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            long value = vec.vector[i + offset];
+            writer.write(value);
+            indexStatistics.updateInteger(value, 1);
+            if (createBloomFilter) {
+              bloomFilter.addLong(value);
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      writer.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      writer.getPosition(recorder);
+    }
+  }
+
+  private static class FloatTreeWriter extends TreeWriter {
+    private final PositionedOutputStream stream;
+    private final SerializationUtils utils;
+
+    FloatTreeWriter(int columnId,
+                      TypeDescription schema,
+                      StreamFactory writer,
+                      boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.stream = writer.createStream(id,
+          OrcProto.Stream.Kind.DATA);
+      this.utils = new SerializationUtils();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      DoubleColumnVector vec = (DoubleColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          float value = (float) vec.vector[0];
+          indexStatistics.updateDouble(value);
+          if (createBloomFilter) {
+            bloomFilter.addDouble(value);
+          }
+          for(int i=0; i < length; ++i) {
+            utils.writeFloat(stream, value);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            float value = (float) vec.vector[i + offset];
+            utils.writeFloat(stream, value);
+            indexStatistics.updateDouble(value);
+            if (createBloomFilter) {
+              bloomFilter.addDouble(value);
+            }
+          }
+        }
+      }
+    }
+
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      stream.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      stream.getPosition(recorder);
+    }
+  }
+
+  private static class DoubleTreeWriter extends TreeWriter {
+    private final PositionedOutputStream stream;
+    private final SerializationUtils utils;
+
+    DoubleTreeWriter(int columnId,
+                    TypeDescription schema,
+                    StreamFactory writer,
+                    boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.stream = writer.createStream(id,
+          OrcProto.Stream.Kind.DATA);
+      this.utils = new SerializationUtils();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      DoubleColumnVector vec = (DoubleColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          double value = vec.vector[0];
+          indexStatistics.updateDouble(value);
+          if (createBloomFilter) {
+            bloomFilter.addDouble(value);
+          }
+          for(int i=0; i < length; ++i) {
+            utils.writeDouble(stream, value);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            double value = vec.vector[i + offset];
+            utils.writeDouble(stream, value);
+            indexStatistics.updateDouble(value);
+            if (createBloomFilter) {
+              bloomFilter.addDouble(value);
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      stream.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      stream.getPosition(recorder);
+    }
+  }
+
+  private static abstract class StringBaseTreeWriter extends TreeWriter {
+    private static final int INITIAL_DICTIONARY_SIZE = 4096;
+    private final OutStream stringOutput;
+    private final IntegerWriter lengthOutput;
+    private final IntegerWriter rowOutput;
+    protected final StringRedBlackTree dictionary =
+        new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
+    protected final DynamicIntArray rows = new DynamicIntArray();
+    protected final PositionedOutputStream directStreamOutput;
+    protected final IntegerWriter directLengthOutput;
+    private final List<OrcProto.RowIndexEntry> savedRowIndex =
+        new ArrayList<OrcProto.RowIndexEntry>();
+    private final boolean buildIndex;
+    private final List<Long> rowIndexValueCount = new ArrayList<Long>();
+    // If the number of keys in a dictionary is greater than this fraction of
+    //the total number of non-null rows, turn off dictionary encoding
+    private final double dictionaryKeySizeThreshold;
+    protected boolean useDictionaryEncoding;
+    private boolean isDirectV2 = true;
+    private boolean doneDictionaryCheck;
+    private final boolean strideDictionaryCheck;
+
+    StringBaseTreeWriter(int columnId,
+                     TypeDescription schema,
+                     StreamFactory writer,
+                     boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      stringOutput = writer.createStream(id,
+          OrcProto.Stream.Kind.DICTIONARY_DATA);
+      lengthOutput = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
+      rowOutput = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.DATA), false, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+      rowIndexValueCount.add(0L);
+      buildIndex = writer.buildIndex();
+      directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
+      directLengthOutput = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
+      Configuration conf = writer.getConfiguration();
+      dictionaryKeySizeThreshold =
+          OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf);
+      strideDictionaryCheck =
+          OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf);
+      useDictionaryEncoding =  dictionaryKeySizeThreshold >= 0.000001; // Epsilon.
+      doneDictionaryCheck = !useDictionaryEncoding;
+    }
+
+    private boolean checkDictionaryEncoding() {
+      if (!doneDictionaryCheck) {
+        // Set the flag indicating whether or not to use dictionary encoding
+        // based on whether or not the fraction of distinct keys over number of
+        // non-null rows is less than the configured threshold
+        float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f;
+        useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold;
+        doneDictionaryCheck = true;
+      }
+      return useDictionaryEncoding;
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
+      // checking would not have happened. So do it again here.
+      checkDictionaryEncoding();
+
+      if (useDictionaryEncoding) {
+        flushDictionary();
+      } else {
+        // flushout any left over entries from dictionary
+        if (rows.size() > 0) {
+          flushDictionary();
+        }
+
+        // suppress the stream for every stripe if dictionary is disabled
+        stringOutput.suppress();
+      }
+
+      // we need to build the rowindex before calling super, since it
+      // writes it out.
+      super.writeStripe(builder, requiredIndexEntries);
+      stringOutput.flush();
+      lengthOutput.flush();
+      rowOutput.flush();
+      directStreamOutput.flush();
+      directLengthOutput.flush();
+      // reset all of the fields to be ready for the next stripe.
+      dictionary.clear();
+      savedRowIndex.clear();
+      rowIndexValueCount.clear();
+      recordPosition(rowIndexPosition);
+      rowIndexValueCount.add(0L);
+
+      if (!useDictionaryEncoding) {
+        // record the start positions of first index stride of next stripe i.e
+        // beginning of the direct streams when dictionary is disabled
+        recordDirectStreamPosition();
+      }
+    }
+
+    private void flushDictionary() throws IOException {
+      final int[] dumpOrder = new int[dictionary.size()];
+
+      if (useDictionaryEncoding) {
+        // Write the dictionary by traversing the red-black tree writing out
+        // the bytes and lengths; and creating the map from the original order
+        // to the final sorted order.
+
+        dictionary.visit(new StringRedBlackTree.Visitor() {
+          private int currentId = 0;
+          @Override
+          public void visit(StringRedBlackTree.VisitorContext context
+                           ) throws IOException {
+            context.writeBytes(stringOutput);
+            lengthOutput.write(context.getLength());
+            dumpOrder[context.getOriginalPosition()] = currentId++;
+          }
+        });
+      } else {
+        // for direct encoding, we don't want the dictionary data stream
+        stringOutput.suppress();
+      }
+      int length = rows.size();
+      int rowIndexEntry = 0;
+      OrcProto.RowIndex.Builder rowIndex = getRowIndex();
+      Text text = new Text();
+      // write the values translated into the dump order.
+      for(int i = 0; i <= length; ++i) {
+        // now that we are writing out the row values, we can finalize the
+        // row index
+        if (buildIndex) {
+          while (i == rowIndexValueCount.get(rowIndexEntry) &&
+              rowIndexEntry < savedRowIndex.size()) {
+            OrcProto.RowIndexEntry.Builder base =
+                savedRowIndex.get(rowIndexEntry++).toBuilder();
+            if (useDictionaryEncoding) {
+              rowOutput.getPosition(new RowIndexPositionRecorder(base));
+            } else {
+              PositionRecorder posn = new RowIndexPositionRecorder(base);
+              directStreamOutput.getPosition(posn);
+              directLengthOutput.getPosition(posn);
+            }
+            rowIndex.addEntry(base.build());
+          }
+        }
+        if (i != length) {
+          if (useDictionaryEncoding) {
+            rowOutput.write(dumpOrder[rows.get(i)]);
+          } else {
+            dictionary.getText(text, rows.get(i));
+            directStreamOutput.write(text.getBytes(), 0, text.getLength());
+            directLengthOutput.write(text.getLength());
+          }
+        }
+      }
+      rows.clear();
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      // Returns the encoding used for the last call to writeStripe
+      if (useDictionaryEncoding) {
+        if(isDirectV2) {
+          return OrcProto.ColumnEncoding.newBuilder().setKind(
+              OrcProto.ColumnEncoding.Kind.DICTIONARY_V2).
+              setDictionarySize(dictionary.size()).build();
+        }
+        return OrcProto.ColumnEncoding.newBuilder().setKind(
+            OrcProto.ColumnEncoding.Kind.DICTIONARY).
+            setDictionarySize(dictionary.size()).build();
+      } else {
+        if(isDirectV2) {
+          return OrcProto.ColumnEncoding.newBuilder().setKind(
+              OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+        }
+        return OrcProto.ColumnEncoding.newBuilder().setKind(
+            OrcProto.ColumnEncoding.Kind.DIRECT).build();
+      }
+    }
+
+    /**
+     * This method doesn't call the super method, because unlike most of the
+     * other TreeWriters, this one can't record the position in the streams
+     * until the stripe is being flushed. Therefore it saves all of the entries
+     * and augments them with the final information as the stripe is written.
+     * @throws IOException
+     */
+    @Override
+    void createRowIndexEntry() throws IOException {
+      getStripeStatistics().merge(indexStatistics);
+      OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
+      rowIndexEntry.setStatistics(indexStatistics.serialize());
+      indexStatistics.reset();
+      OrcProto.RowIndexEntry base = rowIndexEntry.build();
+      savedRowIndex.add(base);
+      rowIndexEntry.clear();
+      addBloomFilterEntry();
+      recordPosition(rowIndexPosition);
+      rowIndexValueCount.add(Long.valueOf(rows.size()));
+      if (strideDictionaryCheck) {
+        checkDictionaryEncoding();
+      }
+      if (!useDictionaryEncoding) {
+        if (rows.size() > 0) {
+          flushDictionary();
+          // just record the start positions of next index stride
+          recordDirectStreamPosition();
+        } else {
+          // record the start positions of next index stride
+          recordDirectStreamPosition();
+          getRowIndex().addEntry(base);
+        }
+      }
+    }
+
+    private void recordDirectStreamPosition() throws IOException {
+      directStreamOutput.getPosition(rowIndexPosition);
+      directLengthOutput.getPosition(rowIndexPosition);
+    }
+
+    @Override
+    long estimateMemory() {
+      return rows.getSizeInBytes() + dictionary.getSizeInBytes();
+    }
+  }
+
+  private static class StringTreeWriter extends StringBaseTreeWriter {
+    StringTreeWriter(int columnId,
+                   TypeDescription schema,
+                   StreamFactory writer,
+                   boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      BytesColumnVector vec = (BytesColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          if (useDictionaryEncoding) {
+            int id = dictionary.add(vec.vector[0], vec.start[0], vec.length[0]);
+            for(int i=0; i < length; ++i) {
+              rows.add(id);
+            }
+          } else {
+            for(int i=0; i < length; ++i) {
+              directStreamOutput.write(vec.vector[0], vec.start[0],
+                  vec.length[0]);
+              directLengthOutput.write(vec.length[0]);
+            }
+          }
+          indexStatistics.updateString(vec.vector[0], vec.start[0],
+              vec.length[0], length);
+          if (createBloomFilter) {
+            bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            if (useDictionaryEncoding) {
+              rows.add(dictionary.add(vec.vector[offset + i],
+                  vec.start[offset + i], vec.length[offset + i]));
+            } else {
+              directStreamOutput.write(vec.vector[offset + i],
+                  vec.start[offset + i], vec.length[offset + i]);
+              directLengthOutput.write(vec.length[offset + i]);
+            }
+            indexStatistics.updateString(vec.vector[offset + i],
+                vec.start[offset + i], vec.length[offset + i], 1);
+            if (createBloomFilter) {
+              bloomFilter.addBytes(vec.vector[offset + i],
+                  vec.start[offset + i], vec.length[offset + i]);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Under the covers, char is written to ORC the same way as string.
+   */
+  private static class CharTreeWriter extends StringBaseTreeWriter {
+    private final int itemLength;
+    private final byte[] padding;
+
+    CharTreeWriter(int columnId,
+        TypeDescription schema,
+        StreamFactory writer,
+        boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      itemLength = schema.getMaxLength();
+      padding = new byte[itemLength];
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      BytesColumnVector vec = (BytesColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          byte[] ptr;
+          int ptrOffset;
+          if (vec.length[0] >= itemLength) {
+            ptr = vec.vector[0];
+            ptrOffset = vec.start[0];
+          } else {
+            ptr = padding;
+            ptrOffset = 0;
+            System.arraycopy(vec.vector[0], vec.start[0], ptr, 0,
+                vec.length[0]);
+            Arrays.fill(ptr, vec.length[0], itemLength, (byte) ' ');
+          }
+          if (useDictionaryEncoding) {
+            int id = dictionary.add(ptr, ptrOffset, itemLength);
+            for(int i=0; i < length; ++i) {
+              rows.add(id);
+            }
+          } else {
+            for(int i=0; i < length; ++i) {
+              directStreamOutput.write(ptr, ptrOffset, itemLength);
+              directLengthOutput.write(itemLength);
+            }
+          }
+          indexStatistics.updateString(ptr, ptrOffset, itemLength, length);
+          if (createBloomFilter) {
+            bloomFilter.addBytes(ptr, ptrOffset, itemLength);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            byte[] ptr;
+            int ptrOffset;
+            if (vec.length[offset + i] >= itemLength) {
+              ptr = vec.vector[offset + i];
+              ptrOffset = vec.start[offset + i];
+            } else {
+              // it is the wrong length, so copy it
+              ptr = padding;
+              ptrOffset = 0;
+              System.arraycopy(vec.vector[offset + i], vec.start[offset + i],
+                  ptr, 0, vec.length[offset + i]);
+              Arrays.fill(ptr, vec.length[offset + i], itemLength, (byte) ' ');
+            }
+            if (useDictionaryEncoding) {
+              rows.add(dictionary.add(ptr, ptrOffset, itemLength));
+            } else {
+              directStreamOutput.write(ptr, ptrOffset, itemLength);
+              directLengthOutput.write(itemLength);
+            }
+            indexStatistics.updateString(ptr, ptrOffset, itemLength, 1);
+            if (createBloomFilter) {
+              bloomFilter.addBytes(ptr, ptrOffset, itemLength);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Under the covers, varchar is written to ORC the same way as string.
+   */
+  private static class VarcharTreeWriter extends StringBaseTreeWriter {
+    private final int maxLength;
+
+    VarcharTreeWriter(int columnId,
+        TypeDescription schema,
+        StreamFactory writer,
+        boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      maxLength = schema.getMaxLength();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      BytesColumnVector vec = (BytesColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          int itemLength = Math.min(vec.length[0], maxLength);
+          if (useDictionaryEncoding) {
+            int id = dictionary.add(vec.vector[0], vec.start[0], itemLength);
+            for(int i=0; i < length; ++i) {
+              rows.add(id);
+            }
+          } else {
+            for(int i=0; i < length; ++i) {
+              directStreamOutput.write(vec.vector[0], vec.start[0],
+                  itemLength);
+              directLengthOutput.write(itemLength);
+            }
+          }
+          indexStatistics.updateString(vec.vector[0], vec.start[0],
+              itemLength, length);
+          if (createBloomFilter) {
+            bloomFilter.addBytes(vec.vector[0], vec.start[0], itemLength);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            int itemLength = Math.min(vec.length[offset + i], maxLength);
+            if (useDictionaryEncoding) {
+              rows.add(dictionary.add(vec.vector[offset + i],
+                  vec.start[offset + i], itemLength));
+            } else {
+              directStreamOutput.write(vec.vector[offset + i],
+                  vec.start[offset + i], itemLength);
+              directLengthOutput.write(itemLength);
+            }
+            indexStatistics.updateString(vec.vector[offset + i],
+                vec.start[offset + i], itemLength, 1);
+            if (createBloomFilter) {
+              bloomFilter.addBytes(vec.vector[offset + i],
+                  vec.start[offset + i], itemLength);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private static class BinaryTreeWriter extends TreeWriter {
+    private final PositionedOutputStream stream;
+    private final IntegerWriter length;
+    private boolean isDirectV2 = true;
+
+    BinaryTreeWriter(int columnId,
+                     TypeDescription schema,
+                     StreamFactory writer,
+                     boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.stream = writer.createStream(id,
+          OrcProto.Stream.Kind.DATA);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      this.length = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      BytesColumnVector vec = (BytesColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          for(int i=0; i < length; ++i) {
+            stream.write(vec.vector[0], vec.start[0],
+                  vec.length[0]);
+            this.length.write(vec.length[0]);
+          }
+          indexStatistics.updateBinary(vec.vector[0], vec.start[0],
+              vec.length[0], length);
+          if (createBloomFilter) {
+            bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            stream.write(vec.vector[offset + i],
+                vec.start[offset + i], vec.length[offset + i]);
+            this.length.write(vec.length[offset + i]);
+            indexStatistics.updateBinary(vec.vector[offset + i],
+                vec.start[offset + i], vec.length[offset + i], 1);
+            if (createBloomFilter) {
+              bloomFilter.addBytes(vec.vector[offset + i],
+                  vec.start[offset + i], vec.length[offset + i]);
+            }
+          }
+        }
+      }
+    }
+
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      stream.flush();
+      length.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      stream.getPosition(recorder);
+      length.getPosition(recorder);
+    }
+  }
+
+  public static long MILLIS_PER_DAY = 24 * 60 * 60 * 1000;
+  public static long NANOS_PER_MILLI = 1000000;
+  public static final int MILLIS_PER_SECOND = 1000;
+  static final int NANOS_PER_SECOND = 1000000000;
+  public static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
+
+  private static class TimestampTreeWriter extends TreeWriter {
+    private final IntegerWriter seconds;
+    private final IntegerWriter nanos;
+    private final boolean isDirectV2;
+    private final long base_timestamp;
+
+    TimestampTreeWriter(int columnId,
+                     TypeDescription schema,
+                     StreamFactory writer,
+                     boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      this.seconds = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.DATA), true, isDirectV2, writer);
+      this.nanos = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+      // for unit tests to set different time zones
+      this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND;
+      writer.useWriterTimeZone(true);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      TimestampColumnVector vec = (TimestampColumnVector) vector;
+      Timestamp val;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          val = vec.asScratchTimestamp(0);
+          long millis = val.getTime();
+          indexStatistics.updateTimestamp(millis);
+          if (createBloomFilter) {
+            bloomFilter.addLong(millis);
+          }
+          final long secs = millis / MILLIS_PER_SECOND - base_timestamp;
+          final long nano = formatNanos(val.getNanos());
+          for(int i=0; i < length; ++i) {
+            seconds.write(secs);
+            nanos.write(nano);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            val = vec.asScratchTimestamp(i + offset);
+            long millis = val.getTime();
+            long secs = millis / MILLIS_PER_SECOND - base_timestamp;
+            seconds.write(secs);
+            nanos.write(formatNanos(val.getNanos()));
+            indexStatistics.updateTimestamp(millis);
+            if (createBloomFilter) {
+              bloomFilter.addLong(millis);
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      seconds.flush();
+      nanos.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    private static long formatNanos(int nanos) {
+      if (nanos == 0) {
+        return 0;
+      } else if (nanos % 100 != 0) {
+        return ((long) nanos) << 3;
+      } else {
+        nanos /= 100;
+        int trailingZeros = 1;
+        while (nanos % 10 == 0 && trailingZeros < 7) {
+          nanos /= 10;
+          trailingZeros += 1;
+        }
+        return ((long) nanos) << 3 | trailingZeros;
+      }
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      seconds.getPosition(recorder);
+      nanos.getPosition(recorder);
+    }
+  }
+
+  private static class DateTreeWriter extends TreeWriter {
+    private final IntegerWriter writer;
+    private final boolean isDirectV2;
+
+    DateTreeWriter(int columnId,
+                   TypeDescription schema,
+                   StreamFactory writer,
+                   boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      OutStream out = writer.createStream(id,
+          OrcProto.Stream.Kind.DATA);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      this.writer = createIntegerWriter(out, true, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      LongColumnVector vec = (LongColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          int value = (int) vec.vector[0];
+          indexStatistics.updateDate(value);
+          if (createBloomFilter) {
+            bloomFilter.addLong(value);
+          }
+          for(int i=0; i < length; ++i) {
+            writer.write(value);
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            int value = (int) vec.vector[i + offset];
+            writer.write(value);
+            indexStatistics.updateDate(value);
+            if (createBloomFilter) {
+              bloomFilter.addLong(value);
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      writer.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      writer.getPosition(recorder);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+  }
+
+  private static class DecimalTreeWriter extends TreeWriter {
+    private final PositionedOutputStream valueStream;
+
+    // These scratch buffers allow us to serialize decimals much faster.
+    private final long[] scratchLongs;
+    private final byte[] scratchBuffer;
+
+    private final IntegerWriter scaleStream;
+    private final boolean isDirectV2;
+
+    DecimalTreeWriter(int columnId,
+                        TypeDescription schema,
+                        StreamFactory writer,
+                        boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA);
+      scratchLongs = new long[HiveDecimal.SCRATCH_LONGS_LEN];
+      scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
+      this.scaleStream = createIntegerWriter(writer.createStream(id,
+          OrcProto.Stream.Kind.SECONDARY), true, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      DecimalColumnVector vec = (DecimalColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          HiveDecimalWritable value = vec.vector[0];
+          indexStatistics.updateDecimal(value);
+          if (createBloomFilter) {
+
+            // The HiveDecimalWritable toString() method with a scratch buffer for good performance
+            // when creating the String.  We need to use a String hash code and not UTF-8 byte[]
+            // hash code in order to get the right hash code.
+            bloomFilter.addString(value.toString(scratchBuffer));
+          }
+          for(int i=0; i < length; ++i) {
+
+            // Use the fast ORC serialization method that emulates SerializationUtils.writeBigInteger
+            // provided by HiveDecimalWritable.
+            value.serializationUtilsWrite(
+                valueStream,
+                scratchLongs);
+            scaleStream.write(value.scale());
+          }
+        }
+      } else {
+        for(int i=0; i < length; ++i) {
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            HiveDecimalWritable value = vec.vector[i + offset];
+            value.serializationUtilsWrite(
+                valueStream,
+                scratchLongs);
+            scaleStream.write(value.scale());
+            indexStatistics.updateDecimal(value);
+            if (createBloomFilter) {
+              bloomFilter.addString(value.toString(scratchBuffer));
+            }
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      valueStream.flush();
+      scaleStream.flush();
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      valueStream.getPosition(recorder);
+      scaleStream.getPosition(recorder);
+    }
+  }
+
+  private static class StructTreeWriter extends TreeWriter {
+    StructTreeWriter(int columnId,
+                     TypeDescription schema,
+                     StreamFactory writer,
+                     boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      List<TypeDescription> children = schema.getChildren();
+      childrenWriters = new TreeWriter[children.size()];
+      for(int i=0; i < childrenWriters.length; ++i) {
+        childrenWriters[i] = createTreeWriter(
+          children.get(i), writer,
+          true);
+      }
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeRootBatch(VectorizedRowBatch batch, int offset,
+                        int length) throws IOException {
+      // update the statistics for the root column
+      indexStatistics.increment(length);
+      // I'm assuming that the root column isn't nullable so that I don't need
+      // to update isPresent.
+      for(int i=0; i < childrenWriters.length; ++i) {
+        childrenWriters[i].writeBatch(batch.cols[i], offset, length);
+      }
+    }
+
+    private static void writeFields(StructColumnVector vector,
+                                    TreeWriter[] childrenWriters,
+                                    int offset, int length) throws IOException {
+      for(int field=0; field < childrenWriters.length; ++field) {
+        childrenWriters[field].writeBatch(vector.fields[field], offset, length);
+      }
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      StructColumnVector vec = (StructColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          writeFields(vec, childrenWriters, offset, length);
+        }
+      } else if (vector.noNulls) {
+        writeFields(vec, childrenWriters, offset, length);
+      } else {
+        // write the records in runs
+        int currentRun = 0;
+        boolean started = false;
+        for(int i=0; i < length; ++i) {
+          if (!vec.isNull[i + offset]) {
+            if (!started) {
+              started = true;
+              currentRun = i;
+            }
+          } else if (started) {
+            started = false;
+            writeFields(vec, childrenWriters, offset + currentRun,
+                i - currentRun);
+          }
+        }
+        if (started) {
+          writeFields(vec, childrenWriters, offset + currentRun,
+              length - currentRun);
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      for(TreeWriter child: childrenWriters) {
+        child.writeStripe(builder, requiredIndexEntries);
+      }
+      recordPosition(rowIndexPosition);
+    }
+  }
+
+  private static class ListTreeWriter extends TreeWriter {
+    private final IntegerWriter lengths;
+    private final boolean isDirectV2;
+
+    ListTreeWriter(int columnId,
+                   TypeDescription schema,
+                   StreamFactory writer,
+                   boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      childrenWriters = new TreeWriter[1];
+      childrenWriters[0] =
+        createTreeWriter(schema.getChildren().get(0), writer, true);
+      lengths = createIntegerWriter(writer.createStream(columnId,
+          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      ListColumnVector vec = (ListColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          int childOffset = (int) vec.offsets[0];
+          int childLength = (int) vec.lengths[0];
+          for(int i=0; i < length; ++i) {
+            lengths.write(childLength);
+            childrenWriters[0].writeBatch(vec.child, childOffset, childLength);
+          }
+          if (createBloomFilter) {
+            bloomFilter.addLong(childLength);
+          }
+        }
+      } else {
+        // write the elements in runs
+        int currentOffset = 0;
+        int currentLength = 0;
+        for(int i=0; i < length; ++i) {
+          if (!vec.isNull[i + offset]) {
+            int nextLength = (int) vec.lengths[offset + i];
+            int nextOffset = (int) vec.offsets[offset + i];
+            lengths.write(nextLength);
+            if (currentLength == 0) {
+              currentOffset = nextOffset;
+              currentLength = nextLength;
+            } else if (currentOffset + currentLength != nextOffset) {
+              childrenWriters[0].writeBatch(vec.child, currentOffset,
+                  currentLength);
+              currentOffset = nextOffset;
+              currentLength = nextLength;
+            } else {
+              currentLength += nextLength;
+            }
+          }
+        }
+        if (currentLength != 0) {
+          childrenWriters[0].writeBatch(vec.child, currentOffset,
+              currentLength);
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      lengths.flush();
+      for(TreeWriter child: childrenWriters) {
+        child.writeStripe(builder, requiredIndexEntries);
+      }
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      lengths.getPosition(recorder);
+    }
+  }
+
+  private static class MapTreeWriter extends TreeWriter {
+    private final IntegerWriter lengths;
+    private final boolean isDirectV2;
+
+    MapTreeWriter(int columnId,
+                  TypeDescription schema,
+                  StreamFactory writer,
+                  boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      this.isDirectV2 = isNewWriteFormat(writer);
+      childrenWriters = new TreeWriter[2];
+      List<TypeDescription> children = schema.getChildren();
+      childrenWriters[0] =
+        createTreeWriter(children.get(0), writer, true);
+      childrenWriters[1] =
+        createTreeWriter(children.get(1), writer, true);
+      lengths = createIntegerWriter(writer.createStream(columnId,
+          OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    OrcProto.ColumnEncoding getEncoding() {
+      if (isDirectV2) {
+        return OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build();
+      }
+      return OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      MapColumnVector vec = (MapColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          int childOffset = (int) vec.offsets[0];
+          int childLength = (int) vec.lengths[0];
+          for(int i=0; i < length; ++i) {
+            lengths.write(childLength);
+            childrenWriters[0].writeBatch(vec.keys, childOffset, childLength);
+            childrenWriters[1].writeBatch(vec.values, childOffset, childLength);
+          }
+          if (createBloomFilter) {
+            bloomFilter.addLong(childLength);
+          }
+        }
+      } else {
+        // write the elements in runs
+        int currentOffset = 0;
+        int currentLength = 0;
+        for(int i=0; i < length; ++i) {
+          if (!vec.isNull[i + offset]) {
+            int nextLength = (int) vec.lengths[offset + i];
+            int nextOffset = (int) vec.offsets[offset + i];
+            lengths.write(nextLength);
+            if (currentLength == 0) {
+              currentOffset = nextOffset;
+              currentLength = nextLength;
+            } else if (currentOffset + currentLength != nextOffset) {
+              childrenWriters[0].writeBatch(vec.keys, currentOffset,
+                  currentLength);
+              childrenWriters[1].writeBatch(vec.values, currentOffset,
+                  currentLength);
+              currentOffset = nextOffset;
+              currentLength = nextLength;
+            } else {
+              currentLength += nextLength;
+            }
+          }
+        }
+        if (currentLength != 0) {
+          childrenWriters[0].writeBatch(vec.keys, currentOffset,
+              currentLength);
+          childrenWriters[1].writeBatch(vec.values, currentOffset,
+              currentLength);
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      lengths.flush();
+      for(TreeWriter child: childrenWriters) {
+        child.writeStripe(builder, requiredIndexEntries);
+      }
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      lengths.getPosition(recorder);
+    }
+  }
+
+  private static class UnionTreeWriter extends TreeWriter {
+    private final RunLengthByteWriter tags;
+
+    UnionTreeWriter(int columnId,
+                  TypeDescription schema,
+                  StreamFactory writer,
+                  boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      List<TypeDescription> children = schema.getChildren();
+      childrenWriters = new TreeWriter[children.size()];
+      for(int i=0; i < childrenWriters.length; ++i) {
+        childrenWriters[i] =
+            createTreeWriter(children.get(i), writer, true);
+      }
+      tags =
+        new RunLengthByteWriter(writer.createStream(columnId,
+            OrcProto.Stream.Kind.DATA));
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void writeBatch(ColumnVector vector, int offset,
+                    int length) throws IOException {
+      super.writeBatch(vector, offset, length);
+      UnionColumnVector vec = (UnionColumnVector) vector;
+      if (vector.isRepeating) {
+        if (vector.noNulls || !vector.isNull[0]) {
+          byte tag = (byte) vec.tags[0];
+          for(int i=0; i < length; ++i) {
+            tags.write(tag);
+          }
+          if (createBloomFilter) {
+            bloomFilter.addLong(tag);
+          }
+          childrenWriters[tag].writeBatch(vec.fields[tag], offset, length);
+        }
+      } else {
+        // write the records in runs of the same tag
+        int[] currentStart = new int[vec.fields.length];
+        int[] currentLength = new int[vec.fields.length];
+        for(int i=0; i < length; ++i) {
+          // only need to deal with the non-nulls, since the nulls were dealt
+          // with in the super method.
+          if (vec.noNulls || !vec.isNull[i + offset]) {
+            byte tag = (byte) vec.tags[offset + i];
+            tags.write(tag);
+            if (currentLength[tag] == 0) {
+              // start a new sequence
+              currentStart[tag] = i + offset;
+              currentLength[tag] = 1;
+            } else if (currentStart[tag] + currentLength[tag] == i + offset) {
+              // ok, we are extending the current run for that tag.
+              currentLength[tag] += 1;
+            } else {
+              // otherwise, we need to close off the old run and start a new one
+              childrenWriters[tag].writeBatch(vec.fields[tag],
+                  currentStart[tag], currentLength[tag]);
+              currentStart[tag] = i + offset;
+              currentLength[tag] = 1;
+            }
+          }
+        }
+        // write out any left over sequences
+        for(int tag=0; tag < currentStart.length; ++tag) {
+          if (currentLength[tag] != 0) {
+            childrenWriters[tag].writeBatch(vec.fields[tag], currentStart[tag],
+                currentLength[tag]);
+          }
+        }
+      }
+    }
+
+    @Override
+    void writeStripe(OrcProto.StripeFooter.Builder builder,
+                     int requiredIndexEntries) throws IOException {
+      super.writeStripe(builder, requiredIndexEntries);
+      tags.flush();
+      for(TreeWriter child: childrenWriters) {
+        child.writeStripe(builder, requiredIndexEntries);
+      }
+      recordPosition(rowIndexPosition);
+    }
+
+    @Override
+    void recordPosition(PositionRecorder recorder) throws IOException {
+      super.recordPosition(recorder);
+      tags.getPosition(recorder);
+    }
+  }
+
+  private static TreeWriter createTreeWriter(TypeDescription schema,
+                                             StreamFactory streamFactory,
+                                             boolean nullable) throws IOException {
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        return new BooleanTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case BYTE:
+        return new ByteTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case SHORT:
+      case INT:
+      case LONG:
+        return new IntegerTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case FLOAT:
+        return new FloatTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case DOUBLE:
+        return new DoubleTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case STRING:
+        return new StringTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case CHAR:
+        return new CharTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case VARCHAR:
+        return new VarcharTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case BINARY:
+        return new BinaryTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case TIMESTAMP:
+        return new TimestampTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case DATE:
+        return new DateTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case DECIMAL:
+        return new DecimalTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory,  nullable);
+      case STRUCT:
+        return new StructTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case MAP:
+        return new MapTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case LIST:
+        return new ListTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case UNION:
+        return new UnionTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      default:
+        throw new IllegalArgumentException("Bad category: " +
+            schema.getCategory());
+    }
+  }
+
+  private static void writeTypes(OrcProto.Footer.Builder builder,
+                                 TypeDescription schema) {
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    List<TypeDescription> children = OrcUtils.setTypeBuilderFromSchema(type, schema);
+    builder.addTypes(type);
+    if (children != null) {
+      for(TypeDescription child: children) {
+        writeTypes(builder, child);
+      }
+    }
+  }
+
+  @VisibleForTesting
+  public void ensureStream() throws IOException {
+    physWriter.initialize();
+  }
+
+  private void createRowIndexEntry() throws IOException {
+    treeWriter.createRowIndexEntry();
+    rowsInIndex = 0;
+  }
+
+  private void flushStripe() throws IOException {
+    ensureStream();
+    if (buildIndex && rowsInIndex != 0) {
+      createRowIndexEntry();
+    }
+    if (rowsInStripe != 0) {
+      if (callback != null) {
+        callback.preStripeWrite(callbackContext);
+      }
+      // finalize the data for the stripe
+      int requiredIndexEntries = rowIndexStride == 0 ? 0 :
+          (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
+      OrcProto.StripeFooter.Builder builder = OrcProto.StripeFooter.newBuilder();
+      OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation
+          .newBuilder().setNumberOfRows(rowsInStripe);
+      treeWriter.writeStripe(builder, requiredIndexEntries);
+      physWriter.finalizeStripe(builder, dirEntry);
+      stripes.add(dirEntry.build());
+      rowCount += rowsInStripe;
+      rowsInStripe = 0;
+    }
+  }
+
+  private long computeRawDataSize() {
+    return getRawDataSize(treeWriter, schema);
+  }
+
+  private long getRawDataSize(TreeWriter child,
+                              TypeDescription schema) {
+    long total = 0;
+    long numVals = child.fileStatistics.getNumberOfValues();
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case FLOAT:
+        return numVals * JavaDataModel.get().primitive1();
+      case LONG:
+      case DOUBLE:
+        return numVals * JavaDataModel.get().primitive2();
+      case STRING:
+      case VARCHAR:
+      case CHAR:
+        // ORC strings are converted to java Strings. so use JavaDataModel to
+        // compute the overall size of strings
+        StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
+        numVals = numVals == 0 ? 1 : numVals;
+        int avgStringLen = (int) (scs.getSum() / numVals);
+        return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
+      case DECIMAL:
+        return numVals * JavaDataModel.get().lengthOfDecimal();
+      case DATE:
+        return numVals * JavaDataModel.get().lengthOfDate();
+      case BINARY:
+        // get total length of binary blob
+        BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
+        return bcs.getSum();
+      case TIMESTAMP:
+        return numVals * JavaDataModel.get().lengthOfTimestamp();
+      case LIST:
+      case MAP:
+      case UNION:
+      case STRUCT: {
+        TreeWriter[] childWriters = child.getChildrenWriters();
+        List<TypeDescription> childTypes = schema.getChildren();
+        for (int i=0; i < childWriters.length; ++i) {
+          total += getRawDataSize(childWriters[i], childTypes.get(i));
+        }
+        break;
+      }
+      default:
+        LOG.debug("Unknown object inspector category.");
+        break;
+    }
+    return total;
+  }
+
+  private void writeFileStatistics(OrcProto.Footer.Builder builder,
+                                   TreeWriter writer) throws IOException {
+    builder.addStatistics(writer.fileStatistics.serialize());
+    for(TreeWriter child: writer.getChildrenWriters()) {
+      writeFileStatistics(builder, child);
+    }
+  }
+
+  private void writeMetadata() throws IOException {
+    ensureStream();
+    OrcProto.Metadata.Builder builder = OrcProto.Metadata.newBuilder();
+    for(OrcProto.StripeStatistics.Builder ssb : treeWriter.stripeStatsBuilders) {
+      builder.addStripeStats(ssb.build());
+    }
+
+    physWriter.writeFileMetadata(builder);
+  }
+
+  private void writeFooter() throws IOException {
+    ensureStream();
+    OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
+    builder.setNumberOfRows(rowCount);
+    builder.setRowIndexStride(rowIndexStride);
+    // populate raw data size
+    rawDataSize = computeRawDataSize();
+    // serialize the types
+    writeTypes(builder, schema);
+    // add the stripe information
+    for(OrcProto.StripeInformation stripe: stripes) {
+      builder.addStripes(stripe);
+    }
+    // add the column statistics
+    writeFileStatistics(builder, treeWriter);
+    // add all of the user metadata
+    for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) {
+      builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
+        .setName(entry.getKey()).setValue(entry.getValue()));
+    }
+    physWriter.writeFileFooter(builder);
+  }
+
+  private void writePostScript() throws IOException {
+    OrcProto.PostScript.Builder builder =
+      OrcProto.PostScript.newBuilder()
+        .setMagic(OrcFile.MAGIC)
+        .addVersion(version.getMajor())
+        .addVersion(version.getMinor())
+        .setWriterVersion(OrcFile.CURRENT_WRITER.getId());
+    physWriter.writePostScript(builder);
+  }
+
+  private long estimateStripeSize() {
+    return physWriter.estimateMemory() + treeWriter.estimateMemory();
+  }
+
+  @Override
+  public TypeDescription getSchema() {
+    return schema;
+  }
+
+  @Override
+  public void addUserMetadata(String name, ByteBuffer value) {
+    userMetadata.put(name, ByteString.copyFrom(value));
+  }
+
+  @Override
+  public void addRowBatch(VectorizedRowBatch batch) throws IOException {
+    if (buildIndex) {
+      // Batch the writes up to the rowIndexStride so that we can get the
+      // right size indexes.
+      int posn = 0;
+      while (posn < batch.size) {
+        int chunkSize = Math.min(batch.size - posn,
+            rowIndexStride - rowsInIndex);
+        treeWriter.writeRootBatch(batch, posn, chunkSize);
+        posn += chunkSize;
+        rowsInIndex += chunkSize;
+        rowsInStripe += chunkSize;
+        if (rowsInIndex >= rowIndexStride) {
+          createRowIndexEntry();
+        }
+      }
+    } else {
+      rowsInStripe += batch.size;
+      treeWriter.writeRootBatch(batch, 0, batch.size);
+    }
+    if (path != null) {
+      memoryManager.addedRow(batch.size);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (callback != null) {
+      callback.preFooterWrite(callbackContext);
+    }
+    // remove us from the memory manager so that we don't get any callbacks
+    if (path != null) {
+      memoryManager.removeWriter(path);
+    }
+    // actually close the file
+    flushStripe();
+    writeMetadata();
+    writeFooter();
+    writePostScript();
+    physWriter.close();
+  }
+
+  /**
+   * Raw data size will be compute when writing the file footer. Hence raw data
+   * size value will be available only after closing the writer.
+   */
+  @Override
+  public long getRawDataSize() {
+    return rawDataSize;
+  }
+
+  /**
+   * Row count gets updated when flushing the stripes. To get accurate row
+   * count call this method after writer is closed.
+   */
+  @Override
+  public long getNumberOfRows() {
+    return rowCount;
+  }
+
+  @Override
+  public long writeIntermediateFooter() throws IOException {
+    // flush any buffered rows
+    flushStripe();
+    // write a footer
+    if (stripesAtLastFlush != stripes.size()) {
+      if (callback != null) {
+        callback.preFooterWrite(callbackContext);
+      }
+      writeMetadata();
+      writeFooter();
+      writePostScript();
+      stripesAtLastFlush = stripes.size();
+      physWriter.flush();
+    }
+    return physWriter.getRawWriterPosition();
+  }
+
+  @Override
+  public void appendStripe(byte[] stripe, int offset, int length,
+      StripeInformation stripeInfo,
+      OrcProto.StripeStatistics stripeStatistics) throws IOException {
+    checkArgument(stripe != null, "Stripe must not be null");
+    checkArgument(length <= stripe.length,
+        "Specified length must not be greater specified array length");
+    checkArgument(stripeInfo != null, "Stripe information must not be null");
+    checkArgument(stripeStatistics != null,
+        "Stripe statistics must not be null");
+
+    ensureStream();
+    OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation.newBuilder();
+    physWriter.appendRawStripe(stripe, offset, length, dirEntry);
+
+    rowsInStripe = stripeStatistics.getColStats(0).getNumberOfValues();
+    rowCount += rowsInStripe;
+
+    // since we have already written the stripe, just update stripe statistics
+    treeWriter.stripeStatsBuilders.add(stripeStatistics.toBuilder());
+
+    // update file level statistics
+    updateFileStatistics(stripeStatistics);
+
+    // update stripe information
+    stripes.add(dirEntry.setNumberOfRows(rowsInStripe)
+        .setIndexLength(stripeInfo.getIndexLength())
+        .setDataLength(stripeInfo.getDataLength())
+        .setFooterLength(stripeInfo.getFooterLength())
+        .build());
+
+    // reset it after writing the stripe
+    rowsInStripe = 0;
+  }
+
+  private void updateFileStatistics(OrcProto.StripeStatistics stripeStatistics) {
+    List<OrcProto.ColumnStatistics> cs = stripeStatistics.getColSt

<TRUNCATED>

[33/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/StripeInformation.java b/orc/src/java/org/apache/hive/orc/StripeInformation.java
new file mode 100644
index 0000000..b8dfc60
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/StripeInformation.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Information about the stripes in an ORC file that is provided by the Reader.
+ */
+public interface StripeInformation {
+  /**
+   * Get the byte offset of the start of the stripe.
+   * @return the bytes from the start of the file
+   */
+  long getOffset();
+
+  /**
+   * Get the total length of the stripe in bytes.
+   * @return the number of bytes in the stripe
+   */
+  long getLength();
+
+  /**
+   * Get the length of the stripe's indexes.
+   * @return the number of bytes in the index
+   */
+  long getIndexLength();
+
+  /**
+   * Get the length of the stripe's data.
+   * @return the number of bytes in the stripe
+   */
+  long getDataLength();
+
+  /**
+   * Get the length of the stripe's tail section, which contains its index.
+   * @return the number of bytes in the tail
+   */
+  long getFooterLength();
+
+  /**
+   * Get the number of rows in the stripe.
+   * @return a count of the number of rows
+   */
+  long getNumberOfRows();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/StripeStatistics.java b/orc/src/java/org/apache/hive/orc/StripeStatistics.java
new file mode 100644
index 0000000..c704dd9
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/StripeStatistics.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hive.orc.impl.ColumnStatisticsImpl;
+
+import java.util.List;
+
+public class StripeStatistics {
+  private final List<OrcProto.ColumnStatistics> cs;
+
+  public StripeStatistics(List<OrcProto.ColumnStatistics> list) {
+    this.cs = list;
+  }
+
+  /**
+   * Return list of column statistics
+   *
+   * @return column stats
+   */
+  public ColumnStatistics[] getColumnStatistics() {
+    ColumnStatistics[] result = new ColumnStatistics[cs.size()];
+    for (int i = 0; i < result.length; ++i) {
+      result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
+    }
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java b/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java
new file mode 100644
index 0000000..55a7e42
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/TimestampColumnStatistics.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.sql.Timestamp;
+
+/**
+ * Statistics for Timestamp columns.
+ */
+public interface TimestampColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the minimum value for the column.
+   * @return minimum value
+   */
+  Timestamp getMinimum();
+
+  /**
+   * Get the maximum value for the column.
+   * @return maximum value
+   */
+  Timestamp getMaximum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/TypeDescription.java b/orc/src/java/org/apache/hive/orc/TypeDescription.java
new file mode 100644
index 0000000..f4ed908
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/TypeDescription.java
@@ -0,0 +1,870 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This is the description of the types in an ORC file.
+ */
+public class TypeDescription
+    implements Comparable<TypeDescription>, Serializable {
+  private static final int MAX_PRECISION = 38;
+  private static final int MAX_SCALE = 38;
+  private static final int DEFAULT_PRECISION = 38;
+  private static final int DEFAULT_SCALE = 10;
+  private static final int DEFAULT_LENGTH = 256;
+
+  @Override
+  public int compareTo(TypeDescription other) {
+    if (this == other) {
+      return 0;
+    } else if (other == null) {
+      return -1;
+    } else {
+      int result = category.compareTo(other.category);
+      if (result == 0) {
+        switch (category) {
+          case CHAR:
+          case VARCHAR:
+            return maxLength - other.maxLength;
+          case DECIMAL:
+            if (precision != other.precision) {
+              return precision - other.precision;
+            }
+            return scale - other.scale;
+          case UNION:
+          case LIST:
+          case MAP:
+            if (children.size() != other.children.size()) {
+              return children.size() - other.children.size();
+            }
+            for(int c=0; result == 0 && c < children.size(); ++c) {
+              result = children.get(c).compareTo(other.children.get(c));
+            }
+            break;
+          case STRUCT:
+            if (children.size() != other.children.size()) {
+              return children.size() - other.children.size();
+            }
+            for(int c=0; result == 0 && c < children.size(); ++c) {
+              result = fieldNames.get(c).compareTo(other.fieldNames.get(c));
+              if (result == 0) {
+                result = children.get(c).compareTo(other.children.get(c));
+              }
+            }
+            break;
+          default:
+            // PASS
+        }
+      }
+      return result;
+    }
+  }
+
+  public enum Category {
+    BOOLEAN("boolean", true),
+    BYTE("tinyint", true),
+    SHORT("smallint", true),
+    INT("int", true),
+    LONG("bigint", true),
+    FLOAT("float", true),
+    DOUBLE("double", true),
+    STRING("string", true),
+    DATE("date", true),
+    TIMESTAMP("timestamp", true),
+    BINARY("binary", true),
+    DECIMAL("decimal", true),
+    VARCHAR("varchar", true),
+    CHAR("char", true),
+    LIST("array", false),
+    MAP("map", false),
+    STRUCT("struct", false),
+    UNION("uniontype", false);
+
+    Category(String name, boolean isPrimitive) {
+      this.name = name;
+      this.isPrimitive = isPrimitive;
+    }
+
+    final boolean isPrimitive;
+    final String name;
+
+    public boolean isPrimitive() {
+      return isPrimitive;
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+
+  public static TypeDescription createBoolean() {
+    return new TypeDescription(Category.BOOLEAN);
+  }
+
+  public static TypeDescription createByte() {
+    return new TypeDescription(Category.BYTE);
+  }
+
+  public static TypeDescription createShort() {
+    return new TypeDescription(Category.SHORT);
+  }
+
+  public static TypeDescription createInt() {
+    return new TypeDescription(Category.INT);
+  }
+
+  public static TypeDescription createLong() {
+    return new TypeDescription(Category.LONG);
+  }
+
+  public static TypeDescription createFloat() {
+    return new TypeDescription(Category.FLOAT);
+  }
+
+  public static TypeDescription createDouble() {
+    return new TypeDescription(Category.DOUBLE);
+  }
+
+  public static TypeDescription createString() {
+    return new TypeDescription(Category.STRING);
+  }
+
+  public static TypeDescription createDate() {
+    return new TypeDescription(Category.DATE);
+  }
+
+  public static TypeDescription createTimestamp() {
+    return new TypeDescription(Category.TIMESTAMP);
+  }
+
+  public static TypeDescription createBinary() {
+    return new TypeDescription(Category.BINARY);
+  }
+
+  public static TypeDescription createDecimal() {
+    return new TypeDescription(Category.DECIMAL);
+  }
+
+  static class StringPosition {
+    final String value;
+    int position;
+    final int length;
+
+    StringPosition(String value) {
+      this.value = value;
+      position = 0;
+      length = value.length();
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buffer = new StringBuilder();
+      buffer.append('\'');
+      buffer.append(value.substring(0, position));
+      buffer.append('^');
+      buffer.append(value.substring(position));
+      buffer.append('\'');
+      return buffer.toString();
+    }
+  }
+
+  static Category parseCategory(StringPosition source) {
+    int start = source.position;
+    while (source.position < source.length) {
+      char ch = source.value.charAt(source.position);
+      if (!Character.isLetter(ch)) {
+        break;
+      }
+      source.position += 1;
+    }
+    if (source.position != start) {
+      String word = source.value.substring(start, source.position).toLowerCase();
+      for (Category cat : Category.values()) {
+        if (cat.getName().equals(word)) {
+          return cat;
+        }
+      }
+    }
+    throw new IllegalArgumentException("Can't parse category at " + source);
+  }
+
+  static int parseInt(StringPosition source) {
+    int start = source.position;
+    int result = 0;
+    while (source.position < source.length) {
+      char ch = source.value.charAt(source.position);
+      if (!Character.isDigit(ch)) {
+        break;
+      }
+      result = result * 10 + (ch - '0');
+      source.position += 1;
+    }
+    if (source.position == start) {
+      throw new IllegalArgumentException("Missing integer at " + source);
+    }
+    return result;
+  }
+
+  static String parseName(StringPosition source) {
+    int start = source.position;
+    while (source.position < source.length) {
+      char ch = source.value.charAt(source.position);
+      if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') {
+        break;
+      }
+      source.position += 1;
+    }
+    if (source.position == start) {
+      throw new IllegalArgumentException("Missing name at " + source);
+    }
+    return source.value.substring(start, source.position);
+  }
+
+  static void requireChar(StringPosition source, char required) {
+    if (source.position >= source.length ||
+        source.value.charAt(source.position) != required) {
+      throw new IllegalArgumentException("Missing required char '" +
+          required + "' at " + source);
+    }
+    source.position += 1;
+  }
+
+  static boolean consumeChar(StringPosition source, char ch) {
+    boolean result = source.position < source.length &&
+        source.value.charAt(source.position) == ch;
+    if (result) {
+      source.position += 1;
+    }
+    return result;
+  }
+
+  static void parseUnion(TypeDescription type, StringPosition source) {
+    requireChar(source, '<');
+    do {
+      type.addUnionChild(parseType(source));
+    } while (consumeChar(source, ','));
+    requireChar(source, '>');
+  }
+
+  static void parseStruct(TypeDescription type, StringPosition source) {
+    requireChar(source, '<');
+    do {
+      String fieldName = parseName(source);
+      requireChar(source, ':');
+      type.addField(fieldName, parseType(source));
+    } while (consumeChar(source, ','));
+    requireChar(source, '>');
+  }
+
+  static TypeDescription parseType(StringPosition source) {
+    TypeDescription result = new TypeDescription(parseCategory(source));
+    switch (result.getCategory()) {
+      case BINARY:
+      case BOOLEAN:
+      case BYTE:
+      case DATE:
+      case DOUBLE:
+      case FLOAT:
+      case INT:
+      case LONG:
+      case SHORT:
+      case STRING:
+      case TIMESTAMP:
+        break;
+      case CHAR:
+      case VARCHAR:
+        requireChar(source, '(');
+        result.withMaxLength(parseInt(source));
+        requireChar(source, ')');
+        break;
+      case DECIMAL: {
+        requireChar(source, '(');
+        int precision = parseInt(source);
+        requireChar(source, ',');
+        result.withScale(parseInt(source));
+        result.withPrecision(precision);
+        requireChar(source, ')');
+        break;
+      }
+      case LIST:
+        requireChar(source, '<');
+        result.children.add(parseType(source));
+        requireChar(source, '>');
+        break;
+      case MAP:
+        requireChar(source, '<');
+        result.children.add(parseType(source));
+        requireChar(source, ',');
+        result.children.add(parseType(source));
+        requireChar(source, '>');
+        break;
+      case UNION:
+        parseUnion(result, source);
+        break;
+      case STRUCT:
+        parseStruct(result, source);
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown type " +
+            result.getCategory() + " at " + source);
+    }
+    return result;
+  }
+
+  /**
+   * Parse TypeDescription from the Hive type names. This is the inverse
+   * of TypeDescription.toString()
+   * @param typeName the name of the type
+   * @return a new TypeDescription or null if typeName was null
+   * @throws IllegalArgumentException if the string is badly formed
+   */
+  public static TypeDescription fromString(String typeName) {
+    if (typeName == null) {
+      return null;
+    }
+    StringPosition source = new StringPosition(typeName);
+    TypeDescription result = parseType(source);
+    if (source.position != source.length) {
+      throw new IllegalArgumentException("Extra characters at " + source);
+    }
+    return result;
+  }
+
+  /**
+   * For decimal types, set the precision.
+   * @param precision the new precision
+   * @return this
+   */
+  public TypeDescription withPrecision(int precision) {
+    if (category != Category.DECIMAL) {
+      throw new IllegalArgumentException("precision is only allowed on decimal"+
+         " and not " + category.name);
+    } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
+      throw new IllegalArgumentException("precision " + precision +
+          " is out of range 1 .. " + scale);
+    }
+    this.precision = precision;
+    return this;
+  }
+
+  /**
+   * For decimal types, set the scale.
+   * @param scale the new scale
+   * @return this
+   */
+  public TypeDescription withScale(int scale) {
+    if (category != Category.DECIMAL) {
+      throw new IllegalArgumentException("scale is only allowed on decimal"+
+          " and not " + category.name);
+    } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
+      throw new IllegalArgumentException("scale is out of range at " + scale);
+    }
+    this.scale = scale;
+    return this;
+  }
+
+  public static TypeDescription createVarchar() {
+    return new TypeDescription(Category.VARCHAR);
+  }
+
+  public static TypeDescription createChar() {
+    return new TypeDescription(Category.CHAR);
+  }
+
+  /**
+   * Set the maximum length for char and varchar types.
+   * @param maxLength the maximum value
+   * @return this
+   */
+  public TypeDescription withMaxLength(int maxLength) {
+    if (category != Category.VARCHAR && category != Category.CHAR) {
+      throw new IllegalArgumentException("maxLength is only allowed on char" +
+                   " and varchar and not " + category.name);
+    }
+    this.maxLength = maxLength;
+    return this;
+  }
+
+  public static TypeDescription createList(TypeDescription childType) {
+    TypeDescription result = new TypeDescription(Category.LIST);
+    result.children.add(childType);
+    childType.parent = result;
+    return result;
+  }
+
+  public static TypeDescription createMap(TypeDescription keyType,
+                                          TypeDescription valueType) {
+    TypeDescription result = new TypeDescription(Category.MAP);
+    result.children.add(keyType);
+    result.children.add(valueType);
+    keyType.parent = result;
+    valueType.parent = result;
+    return result;
+  }
+
+  public static TypeDescription createUnion() {
+    return new TypeDescription(Category.UNION);
+  }
+
+  public static TypeDescription createStruct() {
+    return new TypeDescription(Category.STRUCT);
+  }
+
+  /**
+   * Add a child to a union type.
+   * @param child a new child type to add
+   * @return the union type.
+   */
+  public TypeDescription addUnionChild(TypeDescription child) {
+    if (category != Category.UNION) {
+      throw new IllegalArgumentException("Can only add types to union type" +
+          " and not " + category);
+    }
+    children.add(child);
+    child.parent = this;
+    return this;
+  }
+
+  /**
+   * Add a field to a struct type as it is built.
+   * @param field the field name
+   * @param fieldType the type of the field
+   * @return the struct type
+   */
+  public TypeDescription addField(String field, TypeDescription fieldType) {
+    if (category != Category.STRUCT) {
+      throw new IllegalArgumentException("Can only add fields to struct type" +
+          " and not " + category);
+    }
+    fieldNames.add(field);
+    children.add(fieldType);
+    fieldType.parent = this;
+    return this;
+  }
+
+  /**
+   * Get the id for this type.
+   * The first call will cause all of the the ids in tree to be assigned, so
+   * it should not be called before the type is completely built.
+   * @return the sequential id
+   */
+  public int getId() {
+    // if the id hasn't been assigned, assign all of the ids from the root
+    if (id == -1) {
+      TypeDescription root = this;
+      while (root.parent != null) {
+        root = root.parent;
+      }
+      root.assignIds(0);
+    }
+    return id;
+  }
+
+  public TypeDescription clone() {
+    TypeDescription result = new TypeDescription(category);
+    result.maxLength = maxLength;
+    result.precision = precision;
+    result.scale = scale;
+    if (fieldNames != null) {
+      result.fieldNames.addAll(fieldNames);
+    }
+    if (children != null) {
+      for(TypeDescription child: children) {
+        TypeDescription clone = child.clone();
+        clone.parent = result;
+        result.children.add(clone);
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public int hashCode() {
+    long result = category.ordinal() * 4241 + maxLength + precision * 13 + scale;
+    if (children != null) {
+      for(TypeDescription child: children) {
+        result = result * 6959 + child.hashCode();
+      }
+    }
+    return (int) result;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof TypeDescription)) {
+      return false;
+    }
+    if (other == this) {
+      return true;
+    }
+    TypeDescription castOther = (TypeDescription) other;
+    if (category != castOther.category ||
+        maxLength != castOther.maxLength ||
+        scale != castOther.scale ||
+        precision != castOther.precision) {
+      return false;
+    }
+    if (children != null) {
+      if (children.size() != castOther.children.size()) {
+        return false;
+      }
+      for (int i = 0; i < children.size(); ++i) {
+        if (!children.get(i).equals(castOther.children.get(i))) {
+          return false;
+        }
+      }
+    }
+    if (category == Category.STRUCT) {
+      for(int i=0; i < fieldNames.size(); ++i) {
+        if (!fieldNames.get(i).equals(castOther.fieldNames.get(i))) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Get the maximum id assigned to this type or its children.
+   * The first call will cause all of the the ids in tree to be assigned, so
+   * it should not be called before the type is completely built.
+   * @return the maximum id assigned under this type
+   */
+  public int getMaximumId() {
+    // if the id hasn't been assigned, assign all of the ids from the root
+    if (maxId == -1) {
+      TypeDescription root = this;
+      while (root.parent != null) {
+        root = root.parent;
+      }
+      root.assignIds(0);
+    }
+    return maxId;
+  }
+
+  private ColumnVector createColumn(int maxSize) {
+    switch (category) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case DATE:
+        return new LongColumnVector(maxSize);
+      case TIMESTAMP:
+        return new TimestampColumnVector(maxSize);
+      case FLOAT:
+      case DOUBLE:
+        return new DoubleColumnVector(maxSize);
+      case DECIMAL:
+        return new DecimalColumnVector(maxSize, precision, scale);
+      case STRING:
+      case BINARY:
+      case CHAR:
+      case VARCHAR:
+        return new BytesColumnVector(maxSize);
+      case STRUCT: {
+        ColumnVector[] fieldVector = new ColumnVector[children.size()];
+        for(int i=0; i < fieldVector.length; ++i) {
+          fieldVector[i] = children.get(i).createColumn(maxSize);
+        }
+        return new StructColumnVector(maxSize,
+                fieldVector);
+      }
+      case UNION: {
+        ColumnVector[] fieldVector = new ColumnVector[children.size()];
+        for(int i=0; i < fieldVector.length; ++i) {
+          fieldVector[i] = children.get(i).createColumn(maxSize);
+        }
+        return new UnionColumnVector(maxSize,
+            fieldVector);
+      }
+      case LIST:
+        return new ListColumnVector(maxSize,
+            children.get(0).createColumn(maxSize));
+      case MAP:
+        return new MapColumnVector(maxSize,
+            children.get(0).createColumn(maxSize),
+            children.get(1).createColumn(maxSize));
+      default:
+        throw new IllegalArgumentException("Unknown type " + category);
+    }
+  }
+
+  public VectorizedRowBatch createRowBatch(int maxSize) {
+    VectorizedRowBatch result;
+    if (category == Category.STRUCT) {
+      result = new VectorizedRowBatch(children.size(), maxSize);
+      for(int i=0; i < result.cols.length; ++i) {
+        result.cols[i] = children.get(i).createColumn(maxSize);
+      }
+    } else {
+      result = new VectorizedRowBatch(1, maxSize);
+      result.cols[0] = createColumn(maxSize);
+    }
+    result.reset();
+    return result;
+  }
+
+  public VectorizedRowBatch createRowBatch() {
+    return createRowBatch(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Get the kind of this type.
+   * @return get the category for this type.
+   */
+  public Category getCategory() {
+    return category;
+  }
+
+  /**
+   * Get the maximum length of the type. Only used for char and varchar types.
+   * @return the maximum length of the string type
+   */
+  public int getMaxLength() {
+    return maxLength;
+  }
+
+  /**
+   * Get the precision of the decimal type.
+   * @return the number of digits for the precision.
+   */
+  public int getPrecision() {
+    return precision;
+  }
+
+  /**
+   * Get the scale of the decimal type.
+   * @return the number of digits for the scale.
+   */
+  public int getScale() {
+    return scale;
+  }
+
+  /**
+   * For struct types, get the list of field names.
+   * @return the list of field names.
+   */
+  public List<String> getFieldNames() {
+    return Collections.unmodifiableList(fieldNames);
+  }
+
+  /**
+   * Get the subtypes of this type.
+   * @return the list of children types
+   */
+  public List<TypeDescription> getChildren() {
+    return children == null ? null : Collections.unmodifiableList(children);
+  }
+
+  /**
+   * Assign ids to all of the nodes under this one.
+   * @param startId the lowest id to assign
+   * @return the next available id
+   */
+  private int assignIds(int startId) {
+    id = startId++;
+    if (children != null) {
+      for (TypeDescription child : children) {
+        startId = child.assignIds(startId);
+      }
+    }
+    maxId = startId - 1;
+    return startId;
+  }
+
+  private TypeDescription(Category category) {
+    this.category = category;
+    if (category.isPrimitive) {
+      children = null;
+    } else {
+      children = new ArrayList<>();
+    }
+    if (category == Category.STRUCT) {
+      fieldNames = new ArrayList<>();
+    } else {
+      fieldNames = null;
+    }
+  }
+
+  private int id = -1;
+  private int maxId = -1;
+  private TypeDescription parent;
+  private final Category category;
+  private final List<TypeDescription> children;
+  private final List<String> fieldNames;
+  private int maxLength = DEFAULT_LENGTH;
+  private int precision = DEFAULT_PRECISION;
+  private int scale = DEFAULT_SCALE;
+
+  public void printToBuffer(StringBuilder buffer) {
+    buffer.append(category.name);
+    switch (category) {
+      case DECIMAL:
+        buffer.append('(');
+        buffer.append(precision);
+        buffer.append(',');
+        buffer.append(scale);
+        buffer.append(')');
+        break;
+      case CHAR:
+      case VARCHAR:
+        buffer.append('(');
+        buffer.append(maxLength);
+        buffer.append(')');
+        break;
+      case LIST:
+      case MAP:
+      case UNION:
+        buffer.append('<');
+        for(int i=0; i < children.size(); ++i) {
+          if (i != 0) {
+            buffer.append(',');
+          }
+          children.get(i).printToBuffer(buffer);
+        }
+        buffer.append('>');
+        break;
+      case STRUCT:
+        buffer.append('<');
+        for(int i=0; i < children.size(); ++i) {
+          if (i != 0) {
+            buffer.append(',');
+          }
+          buffer.append(fieldNames.get(i));
+          buffer.append(':');
+          children.get(i).printToBuffer(buffer);
+        }
+        buffer.append('>');
+        break;
+      default:
+        break;
+    }
+  }
+
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    printToBuffer(buffer);
+    return buffer.toString();
+  }
+
+  private void printJsonToBuffer(String prefix, StringBuilder buffer,
+                                 int indent) {
+    for(int i=0; i < indent; ++i) {
+      buffer.append(' ');
+    }
+    buffer.append(prefix);
+    buffer.append("{\"category\": \"");
+    buffer.append(category.name);
+    buffer.append("\", \"id\": ");
+    buffer.append(getId());
+    buffer.append(", \"max\": ");
+    buffer.append(maxId);
+    switch (category) {
+      case DECIMAL:
+        buffer.append(", \"precision\": ");
+        buffer.append(precision);
+        buffer.append(", \"scale\": ");
+        buffer.append(scale);
+        break;
+      case CHAR:
+      case VARCHAR:
+        buffer.append(", \"length\": ");
+        buffer.append(maxLength);
+        break;
+      case LIST:
+      case MAP:
+      case UNION:
+        buffer.append(", \"children\": [");
+        for(int i=0; i < children.size(); ++i) {
+          buffer.append('\n');
+          children.get(i).printJsonToBuffer("", buffer, indent + 2);
+          if (i != children.size() - 1) {
+            buffer.append(',');
+          }
+        }
+        buffer.append("]");
+        break;
+      case STRUCT:
+        buffer.append(", \"fields\": [");
+        for(int i=0; i < children.size(); ++i) {
+          buffer.append('\n');
+          children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
+              buffer, indent + 2);
+          if (i != children.size() - 1) {
+            buffer.append(',');
+          }
+        }
+        buffer.append(']');
+        break;
+      default:
+        break;
+    }
+    buffer.append('}');
+  }
+
+  public String toJson() {
+    StringBuilder buffer = new StringBuilder();
+    printJsonToBuffer("", buffer, 0);
+    return buffer.toString();
+  }
+
+  /**
+   * Locate a subtype by its id.
+   * @param goal the column id to look for
+   * @return the subtype
+   */
+  public TypeDescription findSubtype(int goal) {
+    // call getId method to make sure the ids are assigned
+    int id = getId();
+    if (goal < id || goal > maxId) {
+      throw new IllegalArgumentException("Unknown type id " + id + " in " +
+          toJson());
+    }
+    if (goal == id) {
+      return this;
+    } else {
+      TypeDescription prev = null;
+      for(TypeDescription next: children) {
+        if (next.id > goal) {
+          return prev.findSubtype(goal);
+        }
+        prev = next;
+      }
+      return prev.findSubtype(goal);
+    }
+  }}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/Writer.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/Writer.java b/orc/src/java/org/apache/hive/orc/Writer.java
new file mode 100644
index 0000000..c4c2147
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/Writer.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+/**
+ * The interface for writing ORC files.
+ */
+public interface Writer {
+
+  /**
+   * Get the schema for this writer
+   * @return the file schema
+   */
+  TypeDescription getSchema();
+
+  /**
+   * Add arbitrary meta-data to the ORC file. This may be called at any point
+   * until the Writer is closed. If the same key is passed a second time, the
+   * second value will replace the first.
+   * @param key a key to label the data with.
+   * @param value the contents of the metadata.
+   */
+  void addUserMetadata(String key, ByteBuffer value);
+
+  /**
+   * Add a row batch to the ORC file.
+   * @param batch the rows to add
+   */
+  void addRowBatch(VectorizedRowBatch batch) throws IOException;
+
+  /**
+   * Flush all of the buffers and close the file. No methods on this writer
+   * should be called afterwards.
+   * @throws IOException
+   */
+  void close() throws IOException;
+
+  /**
+   * Return the deserialized data size. Raw data size will be compute when
+   * writing the file footer. Hence raw data size value will be available only
+   * after closing the writer.
+   *
+   * @return raw data size
+   */
+  long getRawDataSize();
+
+  /**
+   * Return the number of rows in file. Row count gets updated when flushing
+   * the stripes. To get accurate row count this method should be called after
+   * closing the writer.
+   *
+   * @return row count
+   */
+  long getNumberOfRows();
+
+  /**
+   * Write an intermediate footer on the file such that if the file is
+   * truncated to the returned offset, it would be a valid ORC file.
+   * @return the offset that would be a valid end location for an ORC file
+   */
+  long writeIntermediateFooter() throws IOException;
+
+  /**
+   * Fast stripe append to ORC file. This interface is used for fast ORC file
+   * merge with other ORC files. When merging, the file to be merged should pass
+   * stripe in binary form along with stripe information and stripe statistics.
+   * After appending last stripe of a file, use appendUserMetadata() to append
+   * any user metadata.
+   * @param stripe - stripe as byte array
+   * @param offset - offset within byte array
+   * @param length - length of stripe within byte array
+   * @param stripeInfo - stripe information
+   * @param stripeStatistics - stripe statistics (Protobuf objects can be
+   *                         merged directly)
+   * @throws IOException
+   */
+  public void appendStripe(byte[] stripe, int offset, int length,
+      StripeInformation stripeInfo,
+      OrcProto.StripeStatistics stripeStatistics) throws IOException;
+
+  /**
+   * When fast stripe append is used for merging ORC stripes, after appending
+   * the last stripe from a file, this interface must be used to merge any
+   * user metadata.
+   * @param userMetadata - user metadata
+   */
+  public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/AcidStats.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/AcidStats.java b/orc/src/java/org/apache/hive/orc/impl/AcidStats.java
new file mode 100644
index 0000000..aff9659
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/AcidStats.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+/**
+ * Statistics about the ACID operations in an ORC file
+ */
+public class AcidStats {
+  public long inserts;
+  public long updates;
+  public long deletes;
+
+  public AcidStats() {
+    inserts = 0;
+    updates = 0;
+    deletes = 0;
+  }
+
+  public AcidStats(String serialized) {
+    String[] parts = serialized.split(",");
+    inserts = Long.parseLong(parts[0]);
+    updates = Long.parseLong(parts[1]);
+    deletes = Long.parseLong(parts[2]);
+  }
+
+  public String serialize() {
+    StringBuilder builder = new StringBuilder();
+    builder.append(inserts);
+    builder.append(",");
+    builder.append(updates);
+    builder.append(",");
+    builder.append(deletes);
+    return builder.toString();
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder builder = new StringBuilder();
+    builder.append(" inserts: ").append(inserts);
+    builder.append(" updates: ").append(updates);
+    builder.append(" deletes: ").append(deletes);
+    return builder.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java
new file mode 100644
index 0000000..264061d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/BitFieldReader.java
@@ -0,0 +1,214 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
+public class BitFieldReader {
+  private final RunLengthByteReader input;
+  /** The number of bits in one item. Non-test code always uses 1. */
+  private final int bitSize;
+  private int current;
+  private int bitsLeft;
+  private final int mask;
+
+  public BitFieldReader(InStream input,
+      int bitSize) throws IOException {
+    this.input = new RunLengthByteReader(input);
+    this.bitSize = bitSize;
+    mask = (1 << bitSize) - 1;
+  }
+
+  public void setInStream(InStream inStream) {
+    this.input.setInStream(inStream);
+  }
+
+  private void readByte() throws IOException {
+    if (input.hasNext()) {
+      current = 0xff & input.next();
+      bitsLeft = 8;
+    } else {
+      throw new EOFException("Read past end of bit field from " + this);
+    }
+  }
+
+  public int next() throws IOException {
+    int result = 0;
+    int bitsLeftToRead = bitSize;
+    while (bitsLeftToRead > bitsLeft) {
+      result <<= bitsLeft;
+      result |= current & ((1 << bitsLeft) - 1);
+      bitsLeftToRead -= bitsLeft;
+      readByte();
+    }
+    if (bitsLeftToRead > 0) {
+      result <<= bitsLeftToRead;
+      bitsLeft -= bitsLeftToRead;
+      result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+    }
+    return result & mask;
+  }
+
+  /**
+   * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
+   * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
+   * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
+   * work anymore once this is called. These is trivial to fix, but these are never interspersed.
+   */
+  private boolean lastRunValue;
+  private int lastRunLength = -1;
+  private void readNextRun(int maxRunLength) throws IOException {
+    assert bitSize == 1;
+    if (lastRunLength > 0) return; // last run is not exhausted yet
+    if (bitsLeft == 0) {
+      readByte();
+    }
+    // First take care of the partial bits.
+    boolean hasVal = false;
+    int runLength = 0;
+    if (bitsLeft != 8) {
+      int partialBitsMask = (1 << bitsLeft) - 1;
+      int partialBits = current & partialBitsMask;
+      if (partialBits == partialBitsMask || partialBits == 0) {
+        lastRunValue = (partialBits == partialBitsMask);
+        if (maxRunLength <= bitsLeft) {
+          lastRunLength = maxRunLength;
+          return;
+        }
+        maxRunLength -= bitsLeft;
+        hasVal = true;
+        runLength = bitsLeft;
+        bitsLeft = 0;
+      } else {
+        // There's no run in partial bits. Return whatever we have.
+        int prefixBitsCount = 32 - bitsLeft;
+        runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
+        lastRunValue = (runLength > 0);
+        lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
+          (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
+        return;
+      }
+      assert bitsLeft == 0;
+      readByte();
+    }
+    if (!hasVal) {
+      lastRunValue = ((current >> 7) == 1);
+      hasVal = true;
+    }
+    // Read full bytes until the run ends.
+    assert bitsLeft == 8;
+    while (maxRunLength >= 8
+        && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
+      runLength += 8;
+      maxRunLength -= 8;
+      readByte();
+    }
+    if (maxRunLength > 0) {
+      int extraBits = Integer.numberOfLeadingZeros(
+          lastRunValue ? (~(current | ~255)) : current) - 24;
+      bitsLeft -= extraBits;
+      runLength += extraBits;
+    }
+    lastRunLength = runLength;
+  }
+
+  public void nextVector(LongColumnVector previous,
+                         long previousLen) throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (previous.noNulls || !previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int types in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && ((previous.vector[0] != previous.vector[i]) ||
+          (previous.isNull[0] != previous.isNull[i]))) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
+  public void seek(PositionProvider index) throws IOException {
+    input.seek(index);
+    int consumed = (int) index.getNext();
+    if (consumed > 8) {
+      throw new IllegalArgumentException("Seek past end of byte at " +
+          consumed + " in " + input);
+    } else if (consumed != 0) {
+      readByte();
+      bitsLeft = 8 - consumed;
+    } else {
+      bitsLeft = 0;
+    }
+  }
+
+  public void skip(long items) throws IOException {
+    long totalBits = bitSize * items;
+    if (bitsLeft >= totalBits) {
+      bitsLeft -= totalBits;
+    } else {
+      totalBits -= bitsLeft;
+      input.skip(totalBits / 8);
+      current = input.next();
+      bitsLeft = (int) (8 - (totalBits % 8));
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "bit reader current: " + current + " bits left: " + bitsLeft +
+        " bit size: " + bitSize + " from " + input;
+  }
+
+  boolean hasFullByte() {
+    return bitsLeft == 8 || bitsLeft == 0;
+  }
+
+  int peekOneBit() throws IOException {
+    assert bitSize == 1;
+    if (bitsLeft == 0) {
+      readByte();
+    }
+    return (current >>> (bitsLeft - 1)) & 1;
+  }
+
+  int peekFullByte() throws IOException {
+    assert bitSize == 1;
+    assert bitsLeft == 8 || bitsLeft == 0;
+    if (bitsLeft == 0) {
+      readByte();
+    }
+    return current;
+  }
+
+  void skipInCurrentByte(int bits) throws IOException {
+    assert bitsLeft >= bits;
+    bitsLeft -= bits;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java b/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java
new file mode 100644
index 0000000..962f035
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/BitFieldWriter.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+public class BitFieldWriter {
+  private RunLengthByteWriter output;
+  private final int bitSize;
+  private byte current = 0;
+  private int bitsLeft = 8;
+
+  public BitFieldWriter(PositionedOutputStream output,
+                 int bitSize) throws IOException {
+    this.output = new RunLengthByteWriter(output);
+    this.bitSize = bitSize;
+  }
+
+  private void writeByte() throws IOException {
+    output.write(current);
+    current = 0;
+    bitsLeft = 8;
+  }
+
+  public void flush() throws IOException {
+    if (bitsLeft != 8) {
+      writeByte();
+    }
+    output.flush();
+  }
+
+  public void write(int value) throws IOException {
+    int bitsToWrite = bitSize;
+    while (bitsToWrite > bitsLeft) {
+      // add the bits to the bottom of the current word
+      current |= value >>> (bitsToWrite - bitsLeft);
+      // subtract out the bits we just added
+      bitsToWrite -= bitsLeft;
+      // zero out the bits above bitsToWrite
+      value &= (1 << bitsToWrite) - 1;
+      writeByte();
+    }
+    bitsLeft -= bitsToWrite;
+    current |= value << bitsLeft;
+    if (bitsLeft == 0) {
+      writeByte();
+    }
+  }
+
+  public void getPosition(PositionRecorder recorder) throws IOException {
+    output.getPosition(recorder);
+    recorder.addPosition(8 - bitsLeft);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java b/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java
new file mode 100644
index 0000000..0f59b5d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/BufferChunk.java
@@ -0,0 +1,85 @@
+package org.apache.hive.orc.impl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+
+/**
+ * The sections of stripe that we have read.
+ * This might not match diskRange - 1 disk range can be multiple buffer chunks,
+ * depending on DFS block boundaries.
+ */
+public class BufferChunk extends DiskRangeList {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(BufferChunk.class);
+  final ByteBuffer chunk;
+
+  public BufferChunk(ByteBuffer chunk, long offset) {
+    super(offset, offset + chunk.remaining());
+    this.chunk = chunk;
+  }
+
+  public ByteBuffer getChunk() {
+    return chunk;
+  }
+
+  @Override
+  public boolean hasData() {
+    return chunk != null;
+  }
+
+  @Override
+  public final String toString() {
+    boolean makesSense = chunk.remaining() == (end - offset);
+    return "data range [" + offset + ", " + end + "), size: " + chunk.remaining()
+        + (makesSense ? "" : "(!)") + " type: " +
+        (chunk.isDirect() ? "direct" : "array-backed");
+  }
+
+  @Override
+  public DiskRange sliceAndShift(long offset, long end, long shiftBy) {
+    assert offset <= end && offset >= this.offset && end <= this.end;
+    assert offset + shiftBy >= 0;
+    ByteBuffer sliceBuf = chunk.slice();
+    int newPos = (int) (offset - this.offset);
+    int newLimit = newPos + (int) (end - offset);
+    try {
+      sliceBuf.position(newPos);
+      sliceBuf.limit(newLimit);
+    } catch (Throwable t) {
+      LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end
+          + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", "
+          + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") "
+          + t.getClass());
+      throw new RuntimeException(t);
+    }
+    return new BufferChunk(sliceBuf, offset + shiftBy);
+  }
+
+  @Override
+  public ByteBuffer getData() {
+    return chunk;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java b/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java
new file mode 100644
index 0000000..9cf725d
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/ColumnStatisticsImpl.java
@@ -0,0 +1,1101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hive.orc.BinaryColumnStatistics;
+import org.apache.hive.orc.BooleanColumnStatistics;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.DecimalColumnStatistics;
+import org.apache.hive.orc.DoubleColumnStatistics;
+import org.apache.hive.orc.IntegerColumnStatistics;
+import org.apache.hive.orc.TimestampColumnStatistics;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.DateColumnStatistics;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StringColumnStatistics;
+
+public class ColumnStatisticsImpl implements ColumnStatistics {
+
+  private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
+      implements BooleanColumnStatistics {
+    private long trueCount = 0;
+
+    BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
+      trueCount = bkt.getCount(0);
+    }
+
+    BooleanStatisticsImpl() {
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      trueCount = 0;
+    }
+
+    @Override
+    public void updateBoolean(boolean value, int repetitions) {
+      if (value) {
+        trueCount += repetitions;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof BooleanStatisticsImpl) {
+        BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
+        trueCount += bkt.trueCount;
+      } else {
+        if (isStatsExists() && trueCount != 0) {
+          throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.BucketStatistics.Builder bucket =
+        OrcProto.BucketStatistics.newBuilder();
+      bucket.addCount(trueCount);
+      builder.setBucketStatistics(bucket);
+      return builder;
+    }
+
+    @Override
+    public long getFalseCount() {
+      return getNumberOfValues() - trueCount;
+    }
+
+    @Override
+    public long getTrueCount() {
+      return trueCount;
+    }
+
+    @Override
+    public String toString() {
+      return super.toString() + " true: " + trueCount;
+    }
+  }
+
+  private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
+      implements IntegerColumnStatistics {
+
+    private long minimum = Long.MAX_VALUE;
+    private long maximum = Long.MIN_VALUE;
+    private long sum = 0;
+    private boolean hasMinimum = false;
+    private boolean overflow = false;
+
+    IntegerStatisticsImpl() {
+    }
+
+    IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
+      if (intStat.hasMinimum()) {
+        hasMinimum = true;
+        minimum = intStat.getMinimum();
+      }
+      if (intStat.hasMaximum()) {
+        maximum = intStat.getMaximum();
+      }
+      if (intStat.hasSum()) {
+        sum = intStat.getSum();
+      } else {
+        overflow = true;
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      hasMinimum = false;
+      minimum = Long.MAX_VALUE;
+      maximum = Long.MIN_VALUE;
+      sum = 0;
+      overflow = false;
+    }
+
+    @Override
+    public void updateInteger(long value, int repetitions) {
+      if (!hasMinimum) {
+        hasMinimum = true;
+        minimum = value;
+        maximum = value;
+      } else if (value < minimum) {
+        minimum = value;
+      } else if (value > maximum) {
+        maximum = value;
+      }
+      if (!overflow) {
+        boolean wasPositive = sum >= 0;
+        sum += value * repetitions;
+        if ((value >= 0) == wasPositive) {
+          overflow = (sum >= 0) != wasPositive;
+        }
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof IntegerStatisticsImpl) {
+        IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
+        if (!hasMinimum) {
+          hasMinimum = otherInt.hasMinimum;
+          minimum = otherInt.minimum;
+          maximum = otherInt.maximum;
+        } else if (otherInt.hasMinimum) {
+          if (otherInt.minimum < minimum) {
+            minimum = otherInt.minimum;
+          }
+          if (otherInt.maximum > maximum) {
+            maximum = otherInt.maximum;
+          }
+        }
+
+        overflow |= otherInt.overflow;
+        if (!overflow) {
+          boolean wasPositive = sum >= 0;
+          sum += otherInt.sum;
+          if ((otherInt.sum >= 0) == wasPositive) {
+            overflow = (sum >= 0) != wasPositive;
+          }
+        }
+      } else {
+        if (isStatsExists() && hasMinimum) {
+          throw new IllegalArgumentException("Incompatible merging of integer column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.IntegerStatistics.Builder intb =
+        OrcProto.IntegerStatistics.newBuilder();
+      if (hasMinimum) {
+        intb.setMinimum(minimum);
+        intb.setMaximum(maximum);
+      }
+      if (!overflow) {
+        intb.setSum(sum);
+      }
+      builder.setIntStatistics(intb);
+      return builder;
+    }
+
+    @Override
+    public long getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public long getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public boolean isSumDefined() {
+      return !overflow;
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (hasMinimum) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      if (!overflow) {
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
+       implements DoubleColumnStatistics {
+    private boolean hasMinimum = false;
+    private double minimum = Double.MAX_VALUE;
+    private double maximum = Double.MIN_VALUE;
+    private double sum = 0;
+
+    DoubleStatisticsImpl() {
+    }
+
+    DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
+      if (dbl.hasMinimum()) {
+        hasMinimum = true;
+        minimum = dbl.getMinimum();
+      }
+      if (dbl.hasMaximum()) {
+        maximum = dbl.getMaximum();
+      }
+      if (dbl.hasSum()) {
+        sum = dbl.getSum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      hasMinimum = false;
+      minimum = Double.MAX_VALUE;
+      maximum = Double.MIN_VALUE;
+      sum = 0;
+    }
+
+    @Override
+    public void updateDouble(double value) {
+      if (!hasMinimum) {
+        hasMinimum = true;
+        minimum = value;
+        maximum = value;
+      } else if (value < minimum) {
+        minimum = value;
+      } else if (value > maximum) {
+        maximum = value;
+      }
+      sum += value;
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof DoubleStatisticsImpl) {
+        DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
+        if (!hasMinimum) {
+          hasMinimum = dbl.hasMinimum;
+          minimum = dbl.minimum;
+          maximum = dbl.maximum;
+        } else if (dbl.hasMinimum) {
+          if (dbl.minimum < minimum) {
+            minimum = dbl.minimum;
+          }
+          if (dbl.maximum > maximum) {
+            maximum = dbl.maximum;
+          }
+        }
+        sum += dbl.sum;
+      } else {
+        if (isStatsExists() && hasMinimum) {
+          throw new IllegalArgumentException("Incompatible merging of double column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.DoubleStatistics.Builder dbl =
+        OrcProto.DoubleStatistics.newBuilder();
+      if (hasMinimum) {
+        dbl.setMinimum(minimum);
+        dbl.setMaximum(maximum);
+      }
+      dbl.setSum(sum);
+      builder.setDoubleStatistics(dbl);
+      return builder;
+    }
+
+    @Override
+    public double getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public double getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public double getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (hasMinimum) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      buf.append(" sum: ");
+      buf.append(sum);
+      return buf.toString();
+    }
+  }
+
+  protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
+      implements StringColumnStatistics {
+    private Text minimum = null;
+    private Text maximum = null;
+    private long sum = 0;
+
+    StringStatisticsImpl() {
+    }
+
+    StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.StringStatistics str = stats.getStringStatistics();
+      if (str.hasMaximum()) {
+        maximum = new Text(str.getMaximum());
+      }
+      if (str.hasMinimum()) {
+        minimum = new Text(str.getMinimum());
+      }
+      if(str.hasSum()) {
+        sum = str.getSum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+      sum = 0;
+    }
+
+    @Override
+    public void updateString(Text value) {
+      if (minimum == null) {
+        maximum = minimum = new Text(value);
+      } else if (minimum.compareTo(value) > 0) {
+        minimum = new Text(value);
+      } else if (maximum.compareTo(value) < 0) {
+        maximum = new Text(value);
+      }
+      sum += value.getLength();
+    }
+
+    @Override
+    public void updateString(byte[] bytes, int offset, int length,
+                             int repetitions) {
+      if (minimum == null) {
+        maximum = minimum = new Text();
+        maximum.set(bytes, offset, length);
+      } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
+          minimum.getLength(), bytes, offset, length) > 0) {
+        minimum = new Text();
+        minimum.set(bytes, offset, length);
+      } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
+          maximum.getLength(), bytes, offset, length) < 0) {
+        maximum = new Text();
+        maximum.set(bytes, offset, length);
+      }
+      sum += length * repetitions;
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof StringStatisticsImpl) {
+        StringStatisticsImpl str = (StringStatisticsImpl) other;
+        if (minimum == null) {
+          if (str.minimum != null) {
+            maximum = new Text(str.getMaximum());
+            minimum = new Text(str.getMinimum());
+          } else {
+          /* both are empty */
+            maximum = minimum = null;
+          }
+        } else if (str.minimum != null) {
+          if (minimum.compareTo(str.minimum) > 0) {
+            minimum = new Text(str.getMinimum());
+          }
+          if (maximum.compareTo(str.maximum) < 0) {
+            maximum = new Text(str.getMaximum());
+          }
+        }
+        sum += str.sum;
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of string column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.StringStatistics.Builder str =
+        OrcProto.StringStatistics.newBuilder();
+      if (getNumberOfValues() != 0) {
+        str.setMinimum(getMinimum());
+        str.setMaximum(getMaximum());
+        str.setSum(sum);
+      }
+      result.setStringStatistics(str);
+      return result;
+    }
+
+    @Override
+    public String getMinimum() {
+      return minimum == null ? null : minimum.toString();
+    }
+
+    @Override
+    public String getMaximum() {
+      return maximum == null ? null : maximum.toString();
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(getMinimum());
+        buf.append(" max: ");
+        buf.append(getMaximum());
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
+      BinaryColumnStatistics {
+
+    private long sum = 0;
+
+    BinaryStatisticsImpl() {
+    }
+
+    BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
+      if (binStats.hasSum()) {
+        sum = binStats.getSum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      sum = 0;
+    }
+
+    @Override
+    public void updateBinary(BytesWritable value) {
+      sum += value.getLength();
+    }
+
+    @Override
+    public void updateBinary(byte[] bytes, int offset, int length,
+                             int repetitions) {
+      sum += length * repetitions;
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof BinaryColumnStatistics) {
+        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+        sum += bin.sum;
+      } else {
+        if (isStatsExists() && sum != 0) {
+          throw new IllegalArgumentException("Incompatible merging of binary column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
+      bin.setSum(sum);
+      result.setBinaryStatistics(bin);
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
+      implements DecimalColumnStatistics {
+
+    // These objects are mutable for better performance.
+    private HiveDecimalWritable minimum = null;
+    private HiveDecimalWritable maximum = null;
+    private HiveDecimalWritable sum = new HiveDecimalWritable(0);
+
+    DecimalStatisticsImpl() {
+    }
+
+    DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
+      if (dec.hasMaximum()) {
+        maximum = new HiveDecimalWritable(dec.getMaximum());
+      }
+      if (dec.hasMinimum()) {
+        minimum = new HiveDecimalWritable(dec.getMinimum());
+      }
+      if (dec.hasSum()) {
+        sum = new HiveDecimalWritable(dec.getSum());
+      } else {
+        sum = null;
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+      sum = new HiveDecimalWritable(0);
+    }
+
+    @Override
+    public void updateDecimal(HiveDecimalWritable value) {
+      if (minimum == null) {
+        minimum = new HiveDecimalWritable(value);
+        maximum = new HiveDecimalWritable(value);
+      } else if (minimum.compareTo(value) > 0) {
+        minimum.set(value);
+      } else if (maximum.compareTo(value) < 0) {
+        maximum.set(value);
+      }
+      if (sum != null) {
+        sum.mutateAdd(value);
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof DecimalStatisticsImpl) {
+        DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
+        if (minimum == null) {
+          minimum = (dec.minimum != null ? new HiveDecimalWritable(dec.minimum) : null);
+          maximum = (dec.maximum != null ? new HiveDecimalWritable(dec.maximum) : null);
+          sum = dec.sum;
+        } else if (dec.minimum != null) {
+          if (minimum.compareTo(dec.minimum) > 0) {
+            minimum.set(dec.minimum);
+          }
+          if (maximum.compareTo(dec.maximum) < 0) {
+            maximum.set(dec.maximum);
+          }
+          if (sum == null || dec.sum == null) {
+            sum = null;
+          } else {
+            sum.mutateAdd(dec.sum);
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.DecimalStatistics.Builder dec =
+          OrcProto.DecimalStatistics.newBuilder();
+      if (getNumberOfValues() != 0 && minimum != null) {
+        dec.setMinimum(minimum.toString());
+        dec.setMaximum(maximum.toString());
+      }
+      // Check isSet for overflow.
+      if (sum != null && sum.isSet()) {
+        dec.setSum(sum.toString());
+      }
+      result.setDecimalStatistics(dec);
+      return result;
+    }
+
+    @Override
+    public HiveDecimal getMinimum() {
+      return minimum == null ? null : minimum.getHiveDecimal();
+    }
+
+    @Override
+    public HiveDecimal getMaximum() {
+      return maximum == null ? null : maximum.getHiveDecimal();
+    }
+
+    @Override
+    public HiveDecimal getSum() {
+      return sum == null ? null : sum.getHiveDecimal();
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+        if (sum != null) {
+          buf.append(" sum: ");
+          buf.append(sum);
+        }
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DateStatisticsImpl extends ColumnStatisticsImpl
+      implements DateColumnStatistics {
+    private Integer minimum = null;
+    private Integer maximum = null;
+
+    DateStatisticsImpl() {
+    }
+
+    DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DateStatistics dateStats = stats.getDateStatistics();
+      // min,max values serialized/deserialized as int (days since epoch)
+      if (dateStats.hasMaximum()) {
+        maximum = dateStats.getMaximum();
+      }
+      if (dateStats.hasMinimum()) {
+        minimum = dateStats.getMinimum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+    }
+
+    @Override
+    public void updateDate(DateWritable value) {
+      if (minimum == null) {
+        minimum = value.getDays();
+        maximum = value.getDays();
+      } else if (minimum > value.getDays()) {
+        minimum = value.getDays();
+      } else if (maximum < value.getDays()) {
+        maximum = value.getDays();
+      }
+    }
+
+    @Override
+    public void updateDate(int value) {
+      if (minimum == null) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum > value) {
+        minimum = value;
+      } else if (maximum < value) {
+        maximum = value;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof DateStatisticsImpl) {
+        DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
+        if (minimum == null) {
+          minimum = dateStats.minimum;
+          maximum = dateStats.maximum;
+        } else if (dateStats.minimum != null) {
+          if (minimum > dateStats.minimum) {
+            minimum = dateStats.minimum;
+          }
+          if (maximum < dateStats.maximum) {
+            maximum = dateStats.maximum;
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of date column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.DateStatistics.Builder dateStats =
+          OrcProto.DateStatistics.newBuilder();
+      if (getNumberOfValues() != 0 && minimum != null) {
+        dateStats.setMinimum(minimum);
+        dateStats.setMaximum(maximum);
+      }
+      result.setDateStatistics(dateStats);
+      return result;
+    }
+
+    private transient final DateWritable minDate = new DateWritable();
+    private transient final DateWritable maxDate = new DateWritable();
+
+    @Override
+    public Date getMinimum() {
+      if (minimum == null) {
+        return null;
+      }
+      minDate.set(minimum);
+      return minDate.get();
+    }
+
+    @Override
+    public Date getMaximum() {
+      if (maximum == null) {
+        return null;
+      }
+      maxDate.set(maximum);
+      return maxDate.get();
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(getMinimum());
+        buf.append(" max: ");
+        buf.append(getMaximum());
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
+      implements TimestampColumnStatistics {
+    private Long minimum = null;
+    private Long maximum = null;
+
+    TimestampStatisticsImpl() {
+    }
+
+    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
+      // min,max values serialized/deserialized as int (milliseconds since epoch)
+      if (timestampStats.hasMaximum()) {
+        maximum = timestampStats.getMaximum();
+      }
+      if (timestampStats.hasMinimum()) {
+        minimum = timestampStats.getMinimum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+    }
+
+    @Override
+    public void updateTimestamp(Timestamp value) {
+      if (minimum == null) {
+        minimum = value.getTime();
+        maximum = value.getTime();
+      } else if (minimum > value.getTime()) {
+        minimum = value.getTime();
+      } else if (maximum < value.getTime()) {
+        maximum = value.getTime();
+      }
+    }
+
+    @Override
+    public void updateTimestamp(long value) {
+      if (minimum == null) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum > value) {
+        minimum = value;
+      } else if (maximum < value) {
+        maximum = value;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof TimestampStatisticsImpl) {
+        TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
+        if (minimum == null) {
+          minimum = timestampStats.minimum;
+          maximum = timestampStats.maximum;
+        } else if (timestampStats.minimum != null) {
+          if (minimum > timestampStats.minimum) {
+            minimum = timestampStats.minimum;
+          }
+          if (maximum < timestampStats.maximum) {
+            maximum = timestampStats.maximum;
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
+          .newBuilder();
+      if (getNumberOfValues() != 0 && minimum != null) {
+        timestampStats.setMinimum(minimum);
+        timestampStats.setMaximum(maximum);
+      }
+      result.setTimestampStatistics(timestampStats);
+      return result;
+    }
+
+    @Override
+    public Timestamp getMinimum() {
+      return minimum == null ? null : new Timestamp(minimum);
+    }
+
+    @Override
+    public Timestamp getMaximum() {
+      return maximum == null ? null : new Timestamp(maximum);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(getMinimum());
+        buf.append(" max: ");
+        buf.append(getMaximum());
+      }
+      return buf.toString();
+    }
+  }
+
+  private long count = 0;
+  private boolean hasNull = false;
+
+  ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
+    if (stats.hasNumberOfValues()) {
+      count = stats.getNumberOfValues();
+    }
+
+    if (stats.hasHasNull()) {
+      hasNull = stats.getHasNull();
+    } else {
+      hasNull = true;
+    }
+  }
+
+  ColumnStatisticsImpl() {
+  }
+
+  public void increment() {
+    count += 1;
+  }
+
+  public void increment(int count) {
+    this.count += count;
+  }
+
+  public void setNull() {
+    hasNull = true;
+  }
+
+  public void updateBoolean(boolean value, int repetitions) {
+    throw new UnsupportedOperationException("Can't update boolean");
+  }
+
+  public void updateInteger(long value, int repetitions) {
+    throw new UnsupportedOperationException("Can't update integer");
+  }
+
+  public void updateDouble(double value) {
+    throw new UnsupportedOperationException("Can't update double");
+  }
+
+  public void updateString(Text value) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  public void updateString(byte[] bytes, int offset, int length,
+                           int repetitions) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  public void updateBinary(BytesWritable value) {
+    throw new UnsupportedOperationException("Can't update binary");
+  }
+
+  public void updateBinary(byte[] bytes, int offset, int length,
+                           int repetitions) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  public void updateDecimal(HiveDecimalWritable value) {
+    throw new UnsupportedOperationException("Can't update decimal");
+  }
+
+  public void updateDate(DateWritable value) {
+    throw new UnsupportedOperationException("Can't update date");
+  }
+
+  public void updateDate(int value) {
+    throw new UnsupportedOperationException("Can't update date");
+  }
+
+  public void updateTimestamp(Timestamp value) {
+    throw new UnsupportedOperationException("Can't update timestamp");
+  }
+
+  public void updateTimestamp(long value) {
+    throw new UnsupportedOperationException("Can't update timestamp");
+  }
+
+  public boolean isStatsExists() {
+    return (count > 0 || hasNull == true);
+  }
+
+  public void merge(ColumnStatisticsImpl stats) {
+    count += stats.count;
+    hasNull |= stats.hasNull;
+  }
+
+  public void reset() {
+    count = 0;
+    hasNull = false;
+  }
+
+  @Override
+  public long getNumberOfValues() {
+    return count;
+  }
+
+  @Override
+  public boolean hasNull() {
+    return hasNull;
+  }
+
+  @Override
+  public String toString() {
+    return "count: " + count + " hasNull: " + hasNull;
+  }
+
+  public OrcProto.ColumnStatistics.Builder serialize() {
+    OrcProto.ColumnStatistics.Builder builder =
+      OrcProto.ColumnStatistics.newBuilder();
+    builder.setNumberOfValues(count);
+    builder.setHasNull(hasNull);
+    return builder;
+  }
+
+  public static ColumnStatisticsImpl create(TypeDescription schema) {
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        return new BooleanStatisticsImpl();
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+        return new IntegerStatisticsImpl();
+      case FLOAT:
+      case DOUBLE:
+        return new DoubleStatisticsImpl();
+      case STRING:
+      case CHAR:
+      case VARCHAR:
+        return new StringStatisticsImpl();
+      case DECIMAL:
+        return new DecimalStatisticsImpl();
+      case DATE:
+        return new DateStatisticsImpl();
+      case TIMESTAMP:
+        return new TimestampStatisticsImpl();
+      case BINARY:
+        return new BinaryStatisticsImpl();
+      default:
+        return new ColumnStatisticsImpl();
+    }
+  }
+
+  public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
+    if (stats.hasBucketStatistics()) {
+      return new BooleanStatisticsImpl(stats);
+    } else if (stats.hasIntStatistics()) {
+      return new IntegerStatisticsImpl(stats);
+    } else if (stats.hasDoubleStatistics()) {
+      return new DoubleStatisticsImpl(stats);
+    } else if (stats.hasStringStatistics()) {
+      return new StringStatisticsImpl(stats);
+    } else if (stats.hasDecimalStatistics()) {
+      return new DecimalStatisticsImpl(stats);
+    } else if (stats.hasDateStatistics()) {
+      return new DateStatisticsImpl(stats);
+    } else if (stats.hasTimestampStatistics()) {
+      return new TimestampStatisticsImpl(stats);
+    } else if(stats.hasBinaryStatistics()) {
+      return new BinaryStatisticsImpl(stats);
+    } else {
+      return new ColumnStatisticsImpl(stats);
+    }
+  }
+}


[03/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestBitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestBitFieldReader.java b/orc/src/test/org/apache/orc/impl/TestBitFieldReader.java
deleted file mode 100644
index e4c6f6b..0000000
--- a/orc/src/test/org/apache/orc/impl/TestBitFieldReader.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-
-public class TestBitFieldReader {
-
-  public void runSeekTest(CompressionCodec codec) throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    final int COUNT = 16384;
-    BitFieldWriter out = new BitFieldWriter(
-        new OutStream("test", 500, codec, collect), 1);
-    TestInStream.PositionCollector[] positions =
-        new TestInStream.PositionCollector[COUNT];
-    for(int i=0; i < COUNT; ++i) {
-      positions[i] = new TestInStream.PositionCollector();
-      out.getPosition(positions[i]);
-      // test runs, non-runs
-      if (i < COUNT / 2) {
-        out.write(i & 1);
-      } else {
-        out.write((i/3) & 1);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    BitFieldReader in = new BitFieldReader(InStream.create("test",
-        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
-        codec, 500), 1);
-    for(int i=0; i < COUNT; ++i) {
-      int x = in.next();
-      if (i < COUNT / 2) {
-        assertEquals(i & 1, x);
-      } else {
-        assertEquals((i/3) & 1, x);
-      }
-    }
-    for(int i=COUNT-1; i >= 0; --i) {
-      in.seek(positions[i]);
-      int x = in.next();
-      if (i < COUNT / 2) {
-        assertEquals(i & 1, x);
-      } else {
-        assertEquals((i/3) & 1, x);
-      }
-    }
-  }
-
-  @Test
-  public void testUncompressedSeek() throws Exception {
-    runSeekTest(null);
-  }
-
-  @Test
-  public void testCompressedSeek() throws Exception {
-    runSeekTest(new ZlibCodec());
-  }
-
-  @Test
-  public void testBiggerItems() throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    final int COUNT = 16384;
-    BitFieldWriter out = new BitFieldWriter(
-        new OutStream("test", 500, null, collect), 3);
-    for(int i=0; i < COUNT; ++i) {
-      // test runs, non-runs
-      if (i < COUNT / 2) {
-        out.write(i & 7);
-      } else {
-        out.write((i/3) & 7);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    BitFieldReader in = new BitFieldReader(InStream.create("test",
-        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
-        null, 500), 3);
-    for(int i=0; i < COUNT; ++i) {
-      int x = in.next();
-      if (i < COUNT / 2) {
-        assertEquals(i & 7, x);
-      } else {
-        assertEquals((i/3) & 7, x);
-      }
-    }
-  }
-
-  @Test
-  public void testSkips() throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    BitFieldWriter out = new BitFieldWriter(
-        new OutStream("test", 100, null, collect), 1);
-    final int COUNT = 16384;
-    for(int i=0; i < COUNT; ++i) {
-      if (i < COUNT/2) {
-        out.write(i & 1);
-      } else {
-        out.write((i/3) & 1);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    BitFieldReader in = new BitFieldReader(InStream.create("test", new ByteBuffer[]{inBuf},
-        new long[]{0}, inBuf.remaining(), null, 100), 1);
-    for(int i=0; i < COUNT; i += 5) {
-      int x = (int) in.next();
-      if (i < COUNT/2) {
-        assertEquals(i & 1, x);
-      } else {
-        assertEquals((i/3) & 1, x);
-      }
-      if (i < COUNT - 5) {
-        in.skip(4);
-      }
-      in.skip(0);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestBitPack.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestBitPack.java b/orc/src/test/org/apache/orc/impl/TestBitPack.java
deleted file mode 100644
index f2d3d64..0000000
--- a/orc/src/test/org/apache/orc/impl/TestBitPack.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-import com.google.common.primitives.Longs;
-
-public class TestBitPack {
-
-  private static final int SIZE = 100;
-  private static Random rand = new Random(100);
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  private long[] deltaEncode(long[] inp) {
-    long[] output = new long[inp.length];
-    SerializationUtils utils = new SerializationUtils();
-    for (int i = 0; i < inp.length; i++) {
-      output[i] = utils.zigzagEncode(inp[i]);
-    }
-    return output;
-  }
-
-  private long nextLong(Random rng, long n) {
-    long bits, val;
-    do {
-      bits = (rng.nextLong() << 1) >>> 1;
-      val = bits % n;
-    } while (bits - val + (n - 1) < 0L);
-    return val;
-  }
-
-  private void runTest(int numBits) throws IOException {
-    long[] inp = new long[SIZE];
-    for (int i = 0; i < SIZE; i++) {
-      long val = 0;
-      if (numBits <= 32) {
-        if (numBits == 1) {
-          val = -1 * rand.nextInt(2);
-        } else {
-          val = rand.nextInt((int) Math.pow(2, numBits - 1));
-        }
-      } else {
-        val = nextLong(rand, (long) Math.pow(2, numBits - 2));
-      }
-      if (val % 2 == 0) {
-        val = -val;
-      }
-      inp[i] = val;
-    }
-    long[] deltaEncoded = deltaEncode(inp);
-    long minInput = Collections.min(Longs.asList(deltaEncoded));
-    long maxInput = Collections.max(Longs.asList(deltaEncoded));
-    long rangeInput = maxInput - minInput;
-    SerializationUtils utils = new SerializationUtils();
-    int fixedWidth = utils.findClosestNumBits(rangeInput);
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    OutStream output = new OutStream("test", SIZE, null, collect);
-    utils.writeInts(deltaEncoded, 0, deltaEncoded.length, fixedWidth, output);
-    output.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    long[] buff = new long[SIZE];
-    utils.readInts(buff, 0, SIZE, fixedWidth, InStream.create("test", new ByteBuffer[] { inBuf },
-        new long[] { 0 }, inBuf.remaining(), null, SIZE));
-    for (int i = 0; i < SIZE; i++) {
-      buff[i] = utils.zigzagDecode(buff[i]);
-    }
-    assertEquals(numBits, fixedWidth);
-    assertArrayEquals(inp, buff);
-  }
-
-  @Test
-  public void test01BitPacking1Bit() throws IOException {
-    runTest(1);
-  }
-
-  @Test
-  public void test02BitPacking2Bit() throws IOException {
-    runTest(2);
-  }
-
-  @Test
-  public void test03BitPacking3Bit() throws IOException {
-    runTest(3);
-  }
-
-  @Test
-  public void test04BitPacking4Bit() throws IOException {
-    runTest(4);
-  }
-
-  @Test
-  public void test05BitPacking5Bit() throws IOException {
-    runTest(5);
-  }
-
-  @Test
-  public void test06BitPacking6Bit() throws IOException {
-    runTest(6);
-  }
-
-  @Test
-  public void test07BitPacking7Bit() throws IOException {
-    runTest(7);
-  }
-
-  @Test
-  public void test08BitPacking8Bit() throws IOException {
-    runTest(8);
-  }
-
-  @Test
-  public void test09BitPacking9Bit() throws IOException {
-    runTest(9);
-  }
-
-  @Test
-  public void test10BitPacking10Bit() throws IOException {
-    runTest(10);
-  }
-
-  @Test
-  public void test11BitPacking11Bit() throws IOException {
-    runTest(11);
-  }
-
-  @Test
-  public void test12BitPacking12Bit() throws IOException {
-    runTest(12);
-  }
-
-  @Test
-  public void test13BitPacking13Bit() throws IOException {
-    runTest(13);
-  }
-
-  @Test
-  public void test14BitPacking14Bit() throws IOException {
-    runTest(14);
-  }
-
-  @Test
-  public void test15BitPacking15Bit() throws IOException {
-    runTest(15);
-  }
-
-  @Test
-  public void test16BitPacking16Bit() throws IOException {
-    runTest(16);
-  }
-
-  @Test
-  public void test17BitPacking17Bit() throws IOException {
-    runTest(17);
-  }
-
-  @Test
-  public void test18BitPacking18Bit() throws IOException {
-    runTest(18);
-  }
-
-  @Test
-  public void test19BitPacking19Bit() throws IOException {
-    runTest(19);
-  }
-
-  @Test
-  public void test20BitPacking20Bit() throws IOException {
-    runTest(20);
-  }
-
-  @Test
-  public void test21BitPacking21Bit() throws IOException {
-    runTest(21);
-  }
-
-  @Test
-  public void test22BitPacking22Bit() throws IOException {
-    runTest(22);
-  }
-
-  @Test
-  public void test23BitPacking23Bit() throws IOException {
-    runTest(23);
-  }
-
-  @Test
-  public void test24BitPacking24Bit() throws IOException {
-    runTest(24);
-  }
-
-  @Test
-  public void test26BitPacking26Bit() throws IOException {
-    runTest(26);
-  }
-
-  @Test
-  public void test28BitPacking28Bit() throws IOException {
-    runTest(28);
-  }
-
-  @Test
-  public void test30BitPacking30Bit() throws IOException {
-    runTest(30);
-  }
-
-  @Test
-  public void test32BitPacking32Bit() throws IOException {
-    runTest(32);
-  }
-
-  @Test
-  public void test40BitPacking40Bit() throws IOException {
-    runTest(40);
-  }
-
-  @Test
-  public void test48BitPacking48Bit() throws IOException {
-    runTest(48);
-  }
-
-  @Test
-  public void test56BitPacking56Bit() throws IOException {
-    runTest(56);
-  }
-
-  @Test
-  public void test64BitPacking64Bit() throws IOException {
-    runTest(64);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java b/orc/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
deleted file mode 100644
index 6165526..0000000
--- a/orc/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.orc.OrcProto;
-import org.apache.orc.TypeDescription;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-public class TestColumnStatisticsImpl {
-
-  @Test
-  public void testUpdateDate() throws Exception {
-    ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(TypeDescription.createDate());
-    DateWritable date = new DateWritable(16400);
-    stat.increment();
-    stat.updateDate(date);
-    assertDateStatistics(stat, 1, 16400, 16400);
-
-    date.set(16410);
-    stat.increment();
-    stat.updateDate(date);
-    assertDateStatistics(stat, 2, 16400, 16410);
-
-    date.set(16420);
-    stat.increment();
-    stat.updateDate(date);
-    assertDateStatistics(stat, 3, 16400, 16420);
-  }
-
-  private void assertDateStatistics(ColumnStatisticsImpl stat, int count, int minimum, int maximum) {
-    OrcProto.ColumnStatistics.Builder builder = stat.serialize();
-
-    assertEquals(count, builder.getNumberOfValues());
-    assertTrue(builder.hasDateStatistics());
-    assertFalse(builder.hasStringStatistics());
-
-    OrcProto.DateStatistics protoStat = builder.getDateStatistics();
-    assertTrue(protoStat.hasMinimum());
-    assertEquals(minimum, protoStat.getMinimum());
-    assertTrue(protoStat.hasMaximum());
-    assertEquals(maximum, protoStat.getMaximum());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestDataReaderProperties.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestDataReaderProperties.java b/orc/src/test/org/apache/orc/impl/TestDataReaderProperties.java
deleted file mode 100644
index 46546b0..0000000
--- a/orc/src/test/org/apache/orc/impl/TestDataReaderProperties.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.CompressionKind;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.mockito.Mockito.mock;
-
-public class TestDataReaderProperties {
-
-  private FileSystem mockedFileSystem = mock(FileSystem.class);
-  private Path mockedPath = mock(Path.class);
-  private boolean mockedZeroCopy = false;
-
-  @Test
-  public void testCompleteBuild() {
-    DataReaderProperties properties = DataReaderProperties.builder()
-      .withFileSystem(mockedFileSystem)
-      .withPath(mockedPath)
-      .withCompression(CompressionKind.ZLIB)
-      .withZeroCopy(mockedZeroCopy)
-      .build();
-    assertEquals(mockedFileSystem, properties.getFileSystem());
-    assertEquals(mockedPath, properties.getPath());
-    assertEquals(CompressionKind.ZLIB, properties.getCompression());
-    assertEquals(mockedZeroCopy, properties.getZeroCopy());
-  }
-
-  @Test
-  public void testMissingNonRequiredArgs() {
-    DataReaderProperties properties = DataReaderProperties.builder()
-      .withFileSystem(mockedFileSystem)
-      .withPath(mockedPath)
-      .build();
-    assertEquals(mockedFileSystem, properties.getFileSystem());
-    assertEquals(mockedPath, properties.getPath());
-    assertNull(properties.getCompression());
-    assertFalse(properties.getZeroCopy());
-  }
-
-  @Test(expected = java.lang.NullPointerException.class)
-  public void testEmptyBuild() {
-    DataReaderProperties.builder().build();
-  }
-
-  @Test(expected = java.lang.NullPointerException.class)
-  public void testMissingPath() {
-    DataReaderProperties.builder()
-      .withFileSystem(mockedFileSystem)
-      .withCompression(CompressionKind.NONE)
-      .withZeroCopy(mockedZeroCopy)
-      .build();
-  }
-
-  @Test(expected = java.lang.NullPointerException.class)
-  public void testMissingFileSystem() {
-    DataReaderProperties.builder()
-      .withPath(mockedPath)
-      .withCompression(CompressionKind.NONE)
-      .withZeroCopy(mockedZeroCopy)
-      .build();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestDynamicArray.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestDynamicArray.java b/orc/src/test/org/apache/orc/impl/TestDynamicArray.java
deleted file mode 100644
index af583f7..0000000
--- a/orc/src/test/org/apache/orc/impl/TestDynamicArray.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.util.Random;
-
-import org.apache.orc.impl.DynamicByteArray;
-import org.apache.orc.impl.DynamicIntArray;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-public class TestDynamicArray {
-
-  @Test
-  public void testByteArray() throws Exception {
-    DynamicByteArray dba = new DynamicByteArray(3, 10);
-    dba.add((byte) 0);
-    dba.add((byte) 1);
-    dba.set(3, (byte) 3);
-    dba.set(2, (byte) 2);
-    dba.add((byte) 4);
-    assertEquals("{0,1,2,3,4}", dba.toString());
-    assertEquals(5, dba.size());
-    byte[] val;
-    val = new byte[0];
-    assertEquals(0, dba.compare(val, 0, 0, 2, 0));
-    assertEquals(-1, dba.compare(val, 0, 0, 2, 1));
-    val = new byte[]{3,42};
-    assertEquals(1, dba.compare(val, 0, 1, 2, 0));
-    assertEquals(1, dba.compare(val, 0, 1, 2, 1));
-    assertEquals(0, dba.compare(val, 0, 1, 3, 1));
-    assertEquals(-1, dba.compare(val, 0, 1, 3, 2));
-    assertEquals(1, dba.compare(val, 0, 2, 3, 1));
-    val = new byte[256];
-    for(int b=-128; b < 128; ++b) {
-      dba.add((byte) b);
-      val[b+128] = (byte) b;
-    }
-    assertEquals(0, dba.compare(val, 0, 256, 5, 256));
-    assertEquals(1, dba.compare(val, 0, 1, 0, 1));
-    assertEquals(1, dba.compare(val, 254, 1, 0, 1));
-    assertEquals(1, dba.compare(val, 120, 1, 64, 1));
-    val = new byte[1024];
-    Random rand = new Random(1701);
-    for(int i = 0; i < val.length; ++i) {
-      rand.nextBytes(val);
-    }
-    dba.add(val, 0, 1024);
-    assertEquals(1285, dba.size());
-    assertEquals(0, dba.compare(val, 0, 1024, 261, 1024));
-  }
-
-  @Test
-  public void testIntArray() throws Exception {
-    DynamicIntArray dia = new DynamicIntArray(10);
-    for(int i=0; i < 10000; ++i) {
-      dia.add(2*i);
-    }
-    assertEquals(10000, dia.size());
-    for(int i=0; i < 10000; ++i) {
-      assertEquals(2*i, dia.get(i));
-    }
-    dia.clear();
-    assertEquals(0, dia.size());
-    dia.add(3);
-    dia.add(12);
-    dia.add(65);
-    assertEquals("{3,12,65}", dia.toString());
-    for(int i=0; i < 5; ++i) {
-      dia.increment(i, 3);
-    }
-    assertEquals("{6,15,68,3,3}", dia.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestInStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestInStream.java b/orc/src/test/org/apache/orc/impl/TestInStream.java
deleted file mode 100644
index 9e65345..0000000
--- a/orc/src/test/org/apache/orc/impl/TestInStream.java
+++ /dev/null
@@ -1,314 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.fail;
-
-import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-
-public class TestInStream {
-
-  static class OutputCollector implements OutStream.OutputReceiver {
-    DynamicByteArray buffer = new DynamicByteArray();
-
-    @Override
-    public void output(ByteBuffer buffer) throws IOException {
-      this.buffer.add(buffer.array(), buffer.arrayOffset() + buffer.position(),
-          buffer.remaining());
-    }
-  }
-
-  static class PositionCollector
-      implements PositionProvider, PositionRecorder {
-    private List<Long> positions = new ArrayList<Long>();
-    private int index = 0;
-
-    @Override
-    public long getNext() {
-      return positions.get(index++);
-    }
-
-    @Override
-    public void addPosition(long offset) {
-      positions.add(offset);
-    }
-
-    public void reset() {
-      index = 0;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder builder = new StringBuilder("position: ");
-      for(int i=0; i < positions.size(); ++i) {
-        if (i != 0) {
-          builder.append(", ");
-        }
-        builder.append(positions.get(i));
-      }
-      return builder.toString();
-    }
-  }
-
-  @Test
-  public void testUncompressed() throws Exception {
-    OutputCollector collect = new OutputCollector();
-    OutStream out = new OutStream("test", 100, null, collect);
-    PositionCollector[] positions = new PositionCollector[1024];
-    for(int i=0; i < 1024; ++i) {
-      positions[i] = new PositionCollector();
-      out.getPosition(positions[i]);
-      out.write(i);
-    }
-    out.flush();
-    assertEquals(1024, collect.buffer.size());
-    for(int i=0; i < 1024; ++i) {
-      assertEquals((byte) i, collect.buffer.get(i));
-    }
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
-        new long[]{0}, inBuf.remaining(), null, 100);
-    assertEquals("uncompressed stream test position: 0 length: 1024" +
-                 " range: 0 offset: 0 limit: 0",
-                 in.toString());
-    for(int i=0; i < 1024; ++i) {
-      int x = in.read();
-      assertEquals(i & 0xff, x);
-    }
-    for(int i=1023; i >= 0; --i) {
-      in.seek(positions[i]);
-      assertEquals(i & 0xff, in.read());
-    }
-  }
-
-  @Test
-  public void testCompressed() throws Exception {
-    OutputCollector collect = new OutputCollector();
-    CompressionCodec codec = new ZlibCodec();
-    OutStream out = new OutStream("test", 300, codec, collect);
-    PositionCollector[] positions = new PositionCollector[1024];
-    for(int i=0; i < 1024; ++i) {
-      positions[i] = new PositionCollector();
-      out.getPosition(positions[i]);
-      out.write(i);
-    }
-    out.flush();
-    assertEquals("test", out.toString());
-    assertEquals(961, collect.buffer.size());
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
-        new long[]{0}, inBuf.remaining(), codec, 300);
-    assertEquals("compressed stream test position: 0 length: 961 range: 0" +
-                 " offset: 0 limit: 0 range 0 = 0 to 961",
-                 in.toString());
-    for(int i=0; i < 1024; ++i) {
-      int x = in.read();
-      assertEquals(i & 0xff, x);
-    }
-    assertEquals(0, in.available());
-    for(int i=1023; i >= 0; --i) {
-      in.seek(positions[i]);
-      assertEquals(i & 0xff, in.read());
-    }
-  }
-
-  @Test
-  public void testCorruptStream() throws Exception {
-    OutputCollector collect = new OutputCollector();
-    CompressionCodec codec = new ZlibCodec();
-    OutStream out = new OutStream("test", 500, codec, collect);
-    PositionCollector[] positions = new PositionCollector[1024];
-    for(int i=0; i < 1024; ++i) {
-      positions[i] = new PositionCollector();
-      out.getPosition(positions[i]);
-      out.write(i);
-    }
-    out.flush();
-
-    // now try to read the stream with a buffer that is too small
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    InStream in = InStream.create("test", new ByteBuffer[]{inBuf},
-        new long[]{0}, inBuf.remaining(), codec, 100);
-    byte[] contents = new byte[1024];
-    try {
-      in.read(contents);
-      fail();
-    } catch(IllegalArgumentException iae) {
-      // EXPECTED
-    }
-
-    // make a corrupted header
-    inBuf.clear();
-    inBuf.put((byte) 32);
-    inBuf.put((byte) 0);
-    inBuf.flip();
-    in = InStream.create("test2", new ByteBuffer[]{inBuf}, new long[]{0},
-        inBuf.remaining(), codec, 300);
-    try {
-      in.read();
-      fail();
-    } catch (IllegalStateException ise) {
-      // EXPECTED
-    }
-  }
-
-  @Test
-  public void testDisjointBuffers() throws Exception {
-    OutputCollector collect = new OutputCollector();
-    CompressionCodec codec = new ZlibCodec();
-    OutStream out = new OutStream("test", 400, codec, collect);
-    PositionCollector[] positions = new PositionCollector[1024];
-    DataOutput stream = new DataOutputStream(out);
-    for(int i=0; i < 1024; ++i) {
-      positions[i] = new PositionCollector();
-      out.getPosition(positions[i]);
-      stream.writeInt(i);
-    }
-    out.flush();
-    assertEquals("test", out.toString());
-    assertEquals(1674, collect.buffer.size());
-    ByteBuffer[] inBuf = new ByteBuffer[3];
-    inBuf[0] = ByteBuffer.allocate(500);
-    inBuf[1] = ByteBuffer.allocate(1200);
-    inBuf[2] = ByteBuffer.allocate(500);
-    collect.buffer.setByteBuffer(inBuf[0], 0, 483);
-    collect.buffer.setByteBuffer(inBuf[1], 483, 1625 - 483);
-    collect.buffer.setByteBuffer(inBuf[2], 1625, 1674 - 1625);
-
-    for(int i=0; i < inBuf.length; ++i) {
-      inBuf[i].flip();
-    }
-    InStream in = InStream.create("test", inBuf,
-        new long[]{0,483, 1625}, 1674, codec, 400);
-    assertEquals("compressed stream test position: 0 length: 1674 range: 0" +
-                 " offset: 0 limit: 0 range 0 = 0 to 483;" +
-                 "  range 1 = 483 to 1142;  range 2 = 1625 to 49",
-                 in.toString());
-    DataInputStream inStream = new DataInputStream(in);
-    for(int i=0; i < 1024; ++i) {
-      int x = inStream.readInt();
-      assertEquals(i, x);
-    }
-    assertEquals(0, in.available());
-    for(int i=1023; i >= 0; --i) {
-      in.seek(positions[i]);
-      assertEquals(i, inStream.readInt());
-    }
-
-    in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
-        new long[]{483, 1625}, 1674, codec, 400);
-    inStream = new DataInputStream(in);
-    positions[303].reset();
-    in.seek(positions[303]);
-    for(int i=303; i < 1024; ++i) {
-      assertEquals(i, inStream.readInt());
-    }
-
-    in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
-        new long[]{0, 1625}, 1674, codec, 400);
-    inStream = new DataInputStream(in);
-    positions[1001].reset();
-    for(int i=0; i < 300; ++i) {
-      assertEquals(i, inStream.readInt());
-    }
-    in.seek(positions[1001]);
-    for(int i=1001; i < 1024; ++i) {
-      assertEquals(i, inStream.readInt());
-    }
-  }
-
-  @Test
-  public void testUncompressedDisjointBuffers() throws Exception {
-    OutputCollector collect = new OutputCollector();
-    OutStream out = new OutStream("test", 400, null, collect);
-    PositionCollector[] positions = new PositionCollector[1024];
-    DataOutput stream = new DataOutputStream(out);
-    for(int i=0; i < 1024; ++i) {
-      positions[i] = new PositionCollector();
-      out.getPosition(positions[i]);
-      stream.writeInt(i);
-    }
-    out.flush();
-    assertEquals("test", out.toString());
-    assertEquals(4096, collect.buffer.size());
-    ByteBuffer[] inBuf = new ByteBuffer[3];
-    inBuf[0] = ByteBuffer.allocate(1100);
-    inBuf[1] = ByteBuffer.allocate(2200);
-    inBuf[2] = ByteBuffer.allocate(1100);
-    collect.buffer.setByteBuffer(inBuf[0], 0, 1024);
-    collect.buffer.setByteBuffer(inBuf[1], 1024, 2048);
-    collect.buffer.setByteBuffer(inBuf[2], 3072, 1024);
-
-    for(int i=0; i < inBuf.length; ++i) {
-      inBuf[i].flip();
-    }
-    InStream in = InStream.create("test", inBuf,
-        new long[]{0, 1024, 3072}, 4096, null, 400);
-    assertEquals("uncompressed stream test position: 0 length: 4096" +
-                 " range: 0 offset: 0 limit: 0",
-                 in.toString());
-    DataInputStream inStream = new DataInputStream(in);
-    for(int i=0; i < 1024; ++i) {
-      int x = inStream.readInt();
-      assertEquals(i, x);
-    }
-    assertEquals(0, in.available());
-    for(int i=1023; i >= 0; --i) {
-      in.seek(positions[i]);
-      assertEquals(i, inStream.readInt());
-    }
-
-    in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]},
-        new long[]{1024, 3072}, 4096, null, 400);
-    inStream = new DataInputStream(in);
-    positions[256].reset();
-    in.seek(positions[256]);
-    for(int i=256; i < 1024; ++i) {
-      assertEquals(i, inStream.readInt());
-    }
-
-    in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]},
-        new long[]{0, 3072}, 4096, null, 400);
-    inStream = new DataInputStream(in);
-    positions[768].reset();
-    for(int i=0; i < 256; ++i) {
-      assertEquals(i, inStream.readInt());
-    }
-    in.seek(positions[768]);
-    for(int i=768; i < 1024; ++i) {
-      assertEquals(i, inStream.readInt());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java b/orc/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java
deleted file mode 100644
index 399f35e..0000000
--- a/orc/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-import java.util.Random;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-
-public class TestIntegerCompressionReader {
-
-  public void runSeekTest(CompressionCodec codec) throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
-        new OutStream("test", 1000, codec, collect), true);
-    TestInStream.PositionCollector[] positions =
-        new TestInStream.PositionCollector[4096];
-    Random random = new Random(99);
-    int[] junk = new int[2048];
-    for(int i=0; i < junk.length; ++i) {
-      junk[i] = random.nextInt();
-    }
-    for(int i=0; i < 4096; ++i) {
-      positions[i] = new TestInStream.PositionCollector();
-      out.getPosition(positions[i]);
-      // test runs, incrementing runs, non-runs
-      if (i < 1024) {
-        out.write(i/4);
-      } else if (i < 2048) {
-        out.write(2*i);
-      } else {
-        out.write(junk[i-2048]);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthIntegerReaderV2 in =
-      new RunLengthIntegerReaderV2(InStream.create
-                                   ("test", new ByteBuffer[]{inBuf},
-                                    new long[]{0}, inBuf.remaining(),
-                                    codec, 1000), true, false);
-    for(int i=0; i < 2048; ++i) {
-      int x = (int) in.next();
-      if (i < 1024) {
-        assertEquals(i/4, x);
-      } else if (i < 2048) {
-        assertEquals(2*i, x);
-      } else {
-        assertEquals(junk[i-2048], x);
-      }
-    }
-    for(int i=2047; i >= 0; --i) {
-      in.seek(positions[i]);
-      int x = (int) in.next();
-      if (i < 1024) {
-        assertEquals(i/4, x);
-      } else if (i < 2048) {
-        assertEquals(2*i, x);
-      } else {
-        assertEquals(junk[i-2048], x);
-      }
-    }
-  }
-
-  @Test
-  public void testUncompressedSeek() throws Exception {
-    runSeekTest(null);
-  }
-
-  @Test
-  public void testCompressedSeek() throws Exception {
-    runSeekTest(new ZlibCodec());
-  }
-
-  @Test
-  public void testSkips() throws Exception {
-    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
-    RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2(
-        new OutStream("test", 100, null, collect), true);
-    for(int i=0; i < 2048; ++i) {
-      if (i < 1024) {
-        out.write(i);
-      } else {
-        out.write(256 * i);
-      }
-    }
-    out.flush();
-    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
-    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
-    inBuf.flip();
-    RunLengthIntegerReaderV2 in =
-      new RunLengthIntegerReaderV2(InStream.create("test",
-                                                   new ByteBuffer[]{inBuf},
-                                                   new long[]{0},
-                                                   inBuf.remaining(),
-                                                   null, 100), true, false);
-    for(int i=0; i < 2048; i += 10) {
-      int x = (int) in.next();
-      if (i < 1024) {
-        assertEquals(i, x);
-      } else {
-        assertEquals(256 * i, x);
-      }
-      if (i < 2038) {
-        in.skip(9);
-      }
-      in.skip(0);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestMemoryManager.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestMemoryManager.java b/orc/src/test/org/apache/orc/impl/TestMemoryManager.java
deleted file mode 100644
index f48c545..0000000
--- a/orc/src/test/org/apache/orc/impl/TestMemoryManager.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.impl.MemoryManager;
-import org.hamcrest.BaseMatcher;
-import org.hamcrest.Description;
-import org.junit.Test;
-import org.mockito.Matchers;
-import org.mockito.Mockito;
-
-import java.lang.management.ManagementFactory;
-
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-
-/**
- * Test the ORC memory manager.
- */
-public class TestMemoryManager {
-  private static final double ERROR = 0.000001;
-
-  private static class NullCallback implements MemoryManager.Callback {
-    public boolean checkMemory(double newScale) {
-      return false;
-    }
-  }
-
-  @Test
-  public void testBasics() throws Exception {
-    Configuration conf = new Configuration();
-    MemoryManager mgr = new MemoryManager(conf);
-    NullCallback callback = new NullCallback();
-    long poolSize = mgr.getTotalMemoryPool();
-    assertEquals(Math.round(ManagementFactory.getMemoryMXBean().
-        getHeapMemoryUsage().getMax() * 0.5d), poolSize);
-    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
-    mgr.addWriter(new Path("p1"), 1000, callback);
-    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
-    mgr.addWriter(new Path("p1"), poolSize / 2, callback);
-    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
-    mgr.addWriter(new Path("p2"), poolSize / 2, callback);
-    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
-    mgr.addWriter(new Path("p3"), poolSize / 2, callback);
-    assertEquals(0.6666667, mgr.getAllocationScale(), 0.00001);
-    mgr.addWriter(new Path("p4"), poolSize / 2, callback);
-    assertEquals(0.5, mgr.getAllocationScale(), 0.000001);
-    mgr.addWriter(new Path("p4"), 3 * poolSize / 2, callback);
-    assertEquals(0.3333333, mgr.getAllocationScale(), 0.000001);
-    mgr.removeWriter(new Path("p1"));
-    mgr.removeWriter(new Path("p2"));
-    assertEquals(0.5, mgr.getAllocationScale(), 0.00001);
-    mgr.removeWriter(new Path("p4"));
-    assertEquals(1.0, mgr.getAllocationScale(), 0.00001);
-  }
-
-  @Test
-  public void testConfig() throws Exception {
-    Configuration conf = new Configuration();
-    conf.set("hive.exec.orc.memory.pool", "0.9");
-    MemoryManager mgr = new MemoryManager(conf);
-    long mem =
-        ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
-    System.err.print("Memory = " + mem);
-    long pool = mgr.getTotalMemoryPool();
-    assertTrue("Pool too small: " + pool, mem * 0.899 < pool);
-    assertTrue("Pool too big: " + pool, pool < mem * 0.901);
-  }
-
-  private static class DoubleMatcher extends BaseMatcher<Double> {
-    final double expected;
-    final double error;
-    DoubleMatcher(double expected, double error) {
-      this.expected = expected;
-      this.error = error;
-    }
-
-    @Override
-    public boolean matches(Object val) {
-      double dbl = (Double) val;
-      return Math.abs(dbl - expected) <= error;
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description.appendText("not sufficiently close to ");
-      description.appendText(Double.toString(expected));
-    }
-  }
-
-  private static DoubleMatcher closeTo(double value, double error) {
-    return new DoubleMatcher(value, error);
-  }
-
-  @Test
-  public void testCallback() throws Exception {
-    Configuration conf = new Configuration();
-    MemoryManager mgr = new MemoryManager(conf);
-    long pool = mgr.getTotalMemoryPool();
-    MemoryManager.Callback[] calls = new MemoryManager.Callback[20];
-    for(int i=0; i < calls.length; ++i) {
-      calls[i] = Mockito.mock(MemoryManager.Callback.class);
-      mgr.addWriter(new Path(Integer.toString(i)), pool/4, calls[i]);
-    }
-    // add enough rows to get the memory manager to check the limits
-    for(int i=0; i < 10000; ++i) {
-      mgr.addedRow(1);
-    }
-    for(int call=0; call < calls.length; ++call) {
-      Mockito.verify(calls[call], Mockito.times(2))
-          .checkMemory(Matchers.doubleThat(closeTo(0.2, ERROR)));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java b/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java
deleted file mode 100644
index efa3ffb..0000000
--- a/orc/src/test/org/apache/orc/impl/TestOrcWideTable.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-
-import org.junit.Test;
-
-public class TestOrcWideTable {
-
-  @Test
-  public void testBufferSizeFor1Col() throws IOException {
-    assertEquals(128 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
-        1, 128*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor50Col() throws IOException {
-    assertEquals(256 * 1024, PhysicalFsWriter.getEstimatedBufferSize(256 * 1024 * 1024,
-        50, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor1000Col() throws IOException {
-    assertEquals(32 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
-        1000, 128*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor2000Col() throws IOException {
-    assertEquals(16 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
-        2000, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor4000Col() throws IOException {
-    assertEquals(8 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
-        4000, 256*1024));
-  }
-
-  @Test
-  public void testBufferSizeFor25000Col() throws IOException {
-    assertEquals(4 * 1024, PhysicalFsWriter.getEstimatedBufferSize(512 * 1024 * 1024,
-        25000, 256*1024));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestOutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestOutStream.java b/orc/src/test/org/apache/orc/impl/TestOutStream.java
deleted file mode 100644
index e9614d5..0000000
--- a/orc/src/test/org/apache/orc/impl/TestOutStream.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.CompressionCodec;
-import org.junit.Test;
-import org.mockito.Mockito;
-
-import java.nio.ByteBuffer;
-
-import static org.junit.Assert.assertEquals;
-
-public class TestOutStream {
-
-  @Test
-  public void testFlush() throws Exception {
-    OutStream.OutputReceiver receiver =
-        Mockito.mock(OutStream.OutputReceiver.class);
-    CompressionCodec codec = new ZlibCodec();
-    OutStream stream = new OutStream("test", 128*1024, codec, receiver);
-    assertEquals(0L, stream.getBufferSize());
-    stream.write(new byte[]{0, 1, 2});
-    stream.flush();
-    Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class));
-    assertEquals(0L, stream.getBufferSize());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestRLEv2.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestRLEv2.java b/orc/src/test/org/apache/orc/impl/TestRLEv2.java
deleted file mode 100644
index e139619..0000000
--- a/orc/src/test/org/apache/orc/impl/TestRLEv2.java
+++ /dev/null
@@ -1,307 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.PrintStream;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcFile;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.apache.orc.tools.FileDump;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-public class TestRLEv2 {
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-  Path testFilePath;
-  Configuration conf;
-  FileSystem fs;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem () throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestRLEv2." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  void appendInt(VectorizedRowBatch batch, int i) {
-    ((LongColumnVector) batch.cols[0]).vector[batch.size++] = i;
-  }
-
-  @Test
-  public void testFixedDeltaZero() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (int i = 0; i < 5120; ++i) {
-      appendInt(batch, 123);
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
-    // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaOne() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (int i = 0; i < 5120; ++i) {
-      appendInt(batch, i % 512);
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
-    // and 1 byte delta (delta = 1). In total, 4 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaOneDescending() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (int i = 0; i < 5120; ++i) {
-      appendInt(batch, 512 - (i % 512));
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
-    // and 1 byte delta (delta = 1). In total, 5 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaLarge() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (int i = 0; i < 5120; ++i) {
-      appendInt(batch, i % 512 + ((i % 512) * 100));
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
-    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testFixedDeltaLargeDescending() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (int i = 0; i < 5120; ++i) {
-      appendInt(batch, (512 - i % 512) + ((i % 512) * 100));
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
-    // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testShortRepeat() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (int i = 0; i < 5; ++i) {
-      appendInt(batch, 10);
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // 1 byte header + 1 byte value
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testDeltaUnknownSign() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    appendInt(batch, 0);
-    for (int i = 0; i < 511; ++i) {
-      appendInt(batch, i);
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
-    // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
-    // each, 5120/8 = 640). Total bytes 642
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
-    System.setOut(origOut);
-  }
-
-  @Test
-  public void testPatchedBase() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-    Writer w = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .compress(CompressionKind.NONE)
-            .setSchema(schema)
-            .rowIndexStride(0)
-            .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
-            .version(OrcFile.Version.V_0_12)
-    );
-
-    Random rand = new Random(123);
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    appendInt(batch, 10000000);
-    for (int i = 0; i < 511; ++i) {
-      appendInt(batch, rand.nextInt(i+1));
-    }
-    w.addRowBatch(batch);
-    w.close();
-
-    PrintStream origOut = System.out;
-    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toUri().toString()});
-    System.out.flush();
-    String outDump = new String(myOut.toByteArray());
-    // use PATCHED_BASE encoding
-    assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
-    System.setOut(origOut);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/impl/TestReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/impl/TestReaderImpl.java b/orc/src/test/org/apache/orc/impl/TestReaderImpl.java
deleted file mode 100644
index 23d0dab..0000000
--- a/orc/src/test/org/apache/orc/impl/TestReaderImpl.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright 2016 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.ByteArrayInputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PositionedReadable;
-import org.apache.hadoop.fs.Seekable;
-import org.apache.orc.FileFormatException;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.OrcFile;
-import org.junit.Test;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.rules.ExpectedException;
-
-public class TestReaderImpl {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  private final Path path = new Path("test-file.orc");
-  private FSDataInputStream in;
-  private int psLen;
-  private ByteBuffer buffer;
-
-  @Before
-  public void setup() {
-    in = null;
-  }
-
-  @Test
-  public void testEnsureOrcFooterSmallTextFile() throws IOException {
-    prepareTestCase("1".getBytes());
-    thrown.expect(FileFormatException.class);
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  @Test
-  public void testEnsureOrcFooterLargeTextFile() throws IOException {
-    prepareTestCase("This is Some Text File".getBytes());
-    thrown.expect(FileFormatException.class);
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  @Test
-  public void testEnsureOrcFooter011ORCFile() throws IOException {
-    prepareTestCase(composeContent(OrcFile.MAGIC, "FOOTER"));
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  @Test
-  public void testEnsureOrcFooterCorrectORCFooter() throws IOException {
-    prepareTestCase(composeContent("", OrcFile.MAGIC));
-    ReaderImpl.ensureOrcFooter(in, path, psLen, buffer);
-  }
-
-  private void prepareTestCase(byte[] bytes) {
-    buffer = ByteBuffer.wrap(bytes);
-    psLen = buffer.get(bytes.length - 1) & 0xff;
-    in = new FSDataInputStream(new SeekableByteArrayInputStream(bytes));
-  }
-
-  private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException {
-    ByteBuffer header = Text.encode(headerStr);
-    ByteBuffer footer = Text.encode(footerStr);
-    int headerLen = header.remaining();
-    int footerLen = footer.remaining() + 1;
-
-    ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen);
-
-    buf.put(header);
-    buf.put(footer);
-    buf.put((byte) footerLen);
-    return buf.array();
-  }
-
-  private static final class SeekableByteArrayInputStream extends ByteArrayInputStream
-          implements Seekable, PositionedReadable {
-
-    public SeekableByteArrayInputStream(byte[] buf) {
-      super(buf);
-    }
-
-    @Override
-    public void seek(long pos) throws IOException {
-      this.reset();
-      this.skip(pos);
-    }
-
-    @Override
-    public long getPos() throws IOException {
-      return pos;
-    }
-
-    @Override
-    public boolean seekToNewSource(long targetPos) throws IOException {
-      return false;
-    }
-
-    @Override
-    public int read(long position, byte[] buffer, int offset, int length)
-            throws IOException {
-      long oldPos = getPos();
-      int nread = -1;
-      try {
-        seek(position);
-        nread = read(buffer, offset, length);
-      } finally {
-        seek(oldPos);
-      }
-      return nread;
-    }
-
-    @Override
-    public void readFully(long position, byte[] buffer, int offset, int length)
-            throws IOException {
-      int nread = 0;
-      while (nread < length) {
-        int nbytes = read(position + nread, buffer, offset + nread, length - nread);
-        if (nbytes < 0) {
-          throw new EOFException("End of file reached before reading fully.");
-        }
-        nread += nbytes;
-      }
-    }
-
-    @Override
-    public void readFully(long position, byte[] buffer)
-            throws IOException {
-      readFully(position, buffer, 0, buffer.length);
-    }
-  }
-}


[08/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRecordReaderImpl.java b/orc/src/test/org/apache/hive/orc/impl/TestRecordReaderImpl.java
new file mode 100644
index 0000000..1849d96
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRecordReaderImpl.java
@@ -0,0 +1,1708 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.BloomFilterIO;
+import org.apache.hive.orc.DataReader;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hive.orc.Reader;
+import org.apache.hive.orc.OrcProto;
+
+import org.junit.Test;
+import org.mockito.MockSettings;
+import org.mockito.Mockito;
+
+public class TestRecordReaderImpl {
+  /**
+   * Create a predicate leaf. This is used by another test.
+   */
+  public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator,
+                                                  PredicateLeaf.Type type,
+                                                  String columnName,
+                                                  Object literal,
+                                                  List<Object> literalList) {
+    return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
+        literal, literalList, null);
+  }
+
+  // can add .verboseLogging() to cause Mockito to log invocations
+  private final MockSettings settings = Mockito.withSettings().verboseLogging();
+
+  static class BufferInStream
+      extends InputStream implements PositionedReadable, Seekable {
+    private final byte[] buffer;
+    private final int length;
+    private int position = 0;
+
+    BufferInStream(byte[] bytes, int length) {
+      this.buffer = bytes;
+      this.length = length;
+    }
+
+    @Override
+    public int read() {
+      if (position < length) {
+        return buffer[position++];
+      }
+      return -1;
+    }
+
+    @Override
+    public int read(byte[] bytes, int offset, int length) {
+      int lengthToRead = Math.min(length, this.length - this.position);
+      if (lengthToRead >= 0) {
+        for(int i=0; i < lengthToRead; ++i) {
+          bytes[offset + i] = buffer[position++];
+        }
+        return lengthToRead;
+      } else {
+        return -1;
+      }
+    }
+
+    @Override
+    public int read(long position, byte[] bytes, int offset, int length) {
+      this.position = (int) position;
+      return read(bytes, offset, length);
+    }
+
+    @Override
+    public void readFully(long position, byte[] bytes, int offset,
+                          int length) throws IOException {
+      this.position = (int) position;
+      while (length > 0) {
+        int result = read(bytes, offset, length);
+        offset += result;
+        length -= result;
+        if (result < 0) {
+          throw new IOException("Read past end of buffer at " + offset);
+        }
+      }
+    }
+
+    @Override
+    public void readFully(long position, byte[] bytes) throws IOException {
+      readFully(position, bytes, 0, bytes.length);
+    }
+
+    @Override
+    public void seek(long position) {
+      this.position = (int) position;
+    }
+
+    @Override
+    public long getPos() {
+      return position;
+    }
+
+    @Override
+    public boolean seekToNewSource(long position) throws IOException {
+      this.position = (int) position;
+      return false;
+    }
+  }
+
+  @Test
+  public void testMaxLengthToReader() throws Exception {
+    Configuration conf = new Configuration();
+    OrcProto.Type rowType = OrcProto.Type.newBuilder()
+        .setKind(OrcProto.Type.Kind.STRUCT).build();
+    OrcProto.Footer footer = OrcProto.Footer.newBuilder()
+        .setHeaderLength(0).setContentLength(0).setNumberOfRows(0)
+        .setRowIndexStride(0).addTypes(rowType).build();
+    OrcProto.PostScript ps = OrcProto.PostScript.newBuilder()
+        .setCompression(OrcProto.CompressionKind.NONE)
+        .setFooterLength(footer.getSerializedSize())
+        .setMagic("ORC").addVersion(0).addVersion(11).build();
+    DataOutputBuffer buffer = new DataOutputBuffer();
+    footer.writeTo(buffer);
+    ps.writeTo(buffer);
+    buffer.write(ps.getSerializedSize());
+    FileSystem fs = mock(FileSystem.class, settings);
+    FSDataInputStream file =
+        new FSDataInputStream(new BufferInStream(buffer.getData(),
+            buffer.getLength()));
+    Path p = new Path("/dir/file.orc");
+    when(fs.open(p)).thenReturn(file);
+    OrcFile.ReaderOptions options = OrcFile.readerOptions(conf);
+    options.filesystem(fs);
+    options.maxLength(buffer.getLength());
+    when(fs.getFileStatus(p))
+        .thenReturn(new FileStatus(10, false, 3, 3000, 0, p));
+    Reader reader = OrcFile.createReader(p, options);
+  }
+
+  @Test
+  public void testCompareToRangeInt() throws Exception {
+    Assert.assertEquals(RecordReaderImpl.Location.BEFORE,
+      RecordReaderImpl.compareToRange(19L, 20L, 40L));
+    Assert.assertEquals(RecordReaderImpl.Location.AFTER,
+      RecordReaderImpl.compareToRange(41L, 20L, 40L));
+    Assert.assertEquals(RecordReaderImpl.Location.MIN,
+        RecordReaderImpl.compareToRange(20L, 20L, 40L));
+    Assert.assertEquals(RecordReaderImpl.Location.MIDDLE,
+        RecordReaderImpl.compareToRange(21L, 20L, 40L));
+    Assert.assertEquals(RecordReaderImpl.Location.MAX,
+      RecordReaderImpl.compareToRange(40L, 20L, 40L));
+    Assert.assertEquals(RecordReaderImpl.Location.BEFORE,
+      RecordReaderImpl.compareToRange(0L, 1L, 1L));
+    Assert.assertEquals(RecordReaderImpl.Location.MIN,
+      RecordReaderImpl.compareToRange(1L, 1L, 1L));
+    Assert.assertEquals(RecordReaderImpl.Location.AFTER,
+      RecordReaderImpl.compareToRange(2L, 1L, 1L));
+  }
+
+  @Test
+  public void testCompareToRangeString() throws Exception {
+    Assert.assertEquals(RecordReaderImpl.Location.BEFORE,
+        RecordReaderImpl.compareToRange("a", "b", "c"));
+    Assert.assertEquals(RecordReaderImpl.Location.AFTER,
+        RecordReaderImpl.compareToRange("d", "b", "c"));
+    Assert.assertEquals(RecordReaderImpl.Location.MIN,
+        RecordReaderImpl.compareToRange("b", "b", "c"));
+    Assert.assertEquals(RecordReaderImpl.Location.MIDDLE,
+        RecordReaderImpl.compareToRange("bb", "b", "c"));
+    Assert.assertEquals(RecordReaderImpl.Location.MAX,
+        RecordReaderImpl.compareToRange("c", "b", "c"));
+    Assert.assertEquals(RecordReaderImpl.Location.BEFORE,
+        RecordReaderImpl.compareToRange("a", "b", "b"));
+    Assert.assertEquals(RecordReaderImpl.Location.MIN,
+        RecordReaderImpl.compareToRange("b", "b", "b"));
+    Assert.assertEquals(RecordReaderImpl.Location.AFTER,
+        RecordReaderImpl.compareToRange("c", "b", "b"));
+  }
+
+  @Test
+  public void testCompareToCharNeedConvert() throws Exception {
+    Assert.assertEquals(RecordReaderImpl.Location.BEFORE,
+      RecordReaderImpl.compareToRange("apple", "hello", "world"));
+    Assert.assertEquals(RecordReaderImpl.Location.AFTER,
+      RecordReaderImpl.compareToRange("zombie", "hello", "world"));
+    Assert.assertEquals(RecordReaderImpl.Location.MIN,
+        RecordReaderImpl.compareToRange("hello", "hello", "world"));
+    Assert.assertEquals(RecordReaderImpl.Location.MIDDLE,
+        RecordReaderImpl.compareToRange("pilot", "hello", "world"));
+    Assert.assertEquals(RecordReaderImpl.Location.MAX,
+      RecordReaderImpl.compareToRange("world", "hello", "world"));
+    Assert.assertEquals(RecordReaderImpl.Location.BEFORE,
+      RecordReaderImpl.compareToRange("apple", "hello", "hello"));
+    Assert.assertEquals(RecordReaderImpl.Location.MIN,
+      RecordReaderImpl.compareToRange("hello", "hello", "hello"));
+    Assert.assertEquals(RecordReaderImpl.Location.AFTER,
+      RecordReaderImpl.compareToRange("zombie", "hello", "hello"));
+  }
+
+  @Test
+  public void testGetMin() throws Exception {
+    assertEquals(10L, RecordReaderImpl.getMin(
+      ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
+    assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
+      OrcProto.ColumnStatistics.newBuilder()
+        .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
+          .setMinimum(10.0d).setMaximum(100.0d).build()).build())));
+    assertEquals(null, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
+      OrcProto.ColumnStatistics.newBuilder()
+        .setStringStatistics(OrcProto.StringStatistics.newBuilder().build())
+        .build())));
+    assertEquals("a", RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
+      OrcProto.ColumnStatistics.newBuilder()
+        .setStringStatistics(OrcProto.StringStatistics.newBuilder()
+          .setMinimum("a").setMaximum("b").build()).build())));
+    assertEquals("hello", RecordReaderImpl.getMin(ColumnStatisticsImpl
+      .deserialize(createStringStats("hello", "world"))));
+    assertEquals(HiveDecimal.create("111.1"), RecordReaderImpl.getMin(ColumnStatisticsImpl
+      .deserialize(createDecimalStats("111.1", "112.1"))));
+  }
+
+  private static OrcProto.ColumnStatistics createIntStats(Long min,
+                                                          Long max) {
+    OrcProto.IntegerStatistics.Builder intStats =
+        OrcProto.IntegerStatistics.newBuilder();
+    if (min != null) {
+      intStats.setMinimum(min);
+    }
+    if (max != null) {
+      intStats.setMaximum(max);
+    }
+    return OrcProto.ColumnStatistics.newBuilder()
+        .setIntStatistics(intStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createBooleanStats(int n, int trueCount) {
+    OrcProto.BucketStatistics.Builder boolStats = OrcProto.BucketStatistics.newBuilder();
+    boolStats.addCount(trueCount);
+    return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(n).setBucketStatistics(
+      boolStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createIntStats(int min, int max) {
+    OrcProto.IntegerStatistics.Builder intStats = OrcProto.IntegerStatistics.newBuilder();
+    intStats.setMinimum(min);
+    intStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setIntStatistics(intStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createDoubleStats(double min, double max) {
+    OrcProto.DoubleStatistics.Builder dblStats = OrcProto.DoubleStatistics.newBuilder();
+    dblStats.setMinimum(min);
+    dblStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createStringStats(String min, String max,
+      boolean hasNull) {
+    OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
+    strStats.setMinimum(min);
+    strStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build())
+        .setHasNull(hasNull).build();
+  }
+
+  private static OrcProto.ColumnStatistics createStringStats(String min, String max) {
+    OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();
+    strStats.setMinimum(min);
+    strStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setStringStatistics(strStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createDateStats(int min, int max) {
+    OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder();
+    dateStats.setMinimum(min);
+    dateStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setDateStatistics(dateStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createTimestampStats(long min, long max) {
+    OrcProto.TimestampStatistics.Builder tsStats = OrcProto.TimestampStatistics.newBuilder();
+    tsStats.setMinimum(min);
+    tsStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setTimestampStatistics(tsStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createDecimalStats(String min, String max) {
+    OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder();
+    decStats.setMinimum(min);
+    decStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build()).build();
+  }
+
+  private static OrcProto.ColumnStatistics createDecimalStats(String min, String max,
+      boolean hasNull) {
+    OrcProto.DecimalStatistics.Builder decStats = OrcProto.DecimalStatistics.newBuilder();
+    decStats.setMinimum(min);
+    decStats.setMaximum(max);
+    return OrcProto.ColumnStatistics.newBuilder().setDecimalStatistics(decStats.build())
+        .setHasNull(hasNull).build();
+  }
+
+  @Test
+  public void testGetMax() throws Exception {
+    assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
+    assertEquals(100.0d, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        OrcProto.ColumnStatistics.newBuilder()
+            .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
+                .setMinimum(10.0d).setMaximum(100.0d).build()).build())));
+    assertEquals(null, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        OrcProto.ColumnStatistics.newBuilder()
+            .setStringStatistics(OrcProto.StringStatistics.newBuilder().build())
+            .build())));
+    assertEquals("b", RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        OrcProto.ColumnStatistics.newBuilder()
+            .setStringStatistics(OrcProto.StringStatistics.newBuilder()
+                .setMinimum("a").setMaximum("b").build()).build())));
+    assertEquals("world", RecordReaderImpl.getMax(ColumnStatisticsImpl
+      .deserialize(createStringStats("hello", "world"))));
+    assertEquals(HiveDecimal.create("112.1"), RecordReaderImpl.getMax(ColumnStatisticsImpl
+      .deserialize(createDecimalStats("111.1", "112.1"))));
+  }
+
+  @Test
+  public void testPredEvalWithBooleanStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
+
+    pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
+
+    pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", false, null);
+    assertEquals(TruthValue.NO,
+      RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
+    assertEquals(TruthValue.YES_NO,
+      RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
+  }
+
+  @Test
+  public void testPredEvalWithIntStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+
+    // Stats gets converted to column type. "15" is outside of "10" and "100"
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "15", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+
+    // Integer stats will not be converted date because of days/seconds/millis ambiguity
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
+    assertEquals(TruthValue.YES_NO,
+      RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
+  }
+
+  @Test
+  public void testPredEvalWithDoubleStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+
+    // Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0"
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "15", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+
+    // Double is not converted to date type because of days/seconds/millis ambiguity
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
+  }
+
+  @Test
+  public void testPredEvalWithStringStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.FLOAT, "x", 100.0, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "100", null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+
+    // IllegalArgumentException is thrown when converting String to Date, hence YES_NO
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 1000), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("100"), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
+  }
+
+  @Test
+  public void testPredEvalWithDateStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    // Date to Integer conversion is not possible.
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    // Date to Float conversion is also not possible.
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "15", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "1970-01-11", null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "15.1", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "__a15__1", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "2000-01-16", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "1970-01-16", null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    // Date to Decimal conversion is also not possible.
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
+  }
+
+  @Test
+  public void testPredEvalWithDecimalStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+
+    // "15" out of range of "10.0" and "100.0"
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "15", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+
+    // Decimal to Date not possible.
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
+  }
+
+  @Test
+  public void testPredEvalWithTimestampStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", "15", null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.STRING, "x", new Timestamp(15).toString(), null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10 * 24L * 60L * 60L * 1000L,
+          100 * 24L * 60L * 60L * 1000L), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
+
+    pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null));
+  }
+
+  @Test
+  public void testEquals() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
+  }
+
+  @Test
+  public void testNullSafeEquals() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 15L), pred, null));
+  }
+
+  @Test
+  public void testLessThan() throws Exception {
+    PredicateLeaf lessThan = createPredicateLeaf
+        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), lessThan, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), lessThan, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), lessThan, null));
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), lessThan, null));
+  }
+
+  @Test
+  public void testLessThanEquals() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
+            "x", 15L, null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 15L), pred, null));
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 10L), pred, null));
+  }
+
+  @Test
+  public void testIn() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(10L);
+    args.add(20L);
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
+            "x", null, args);
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 30L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 30L), pred, null));
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
+  }
+
+  @Test
+  public void testBetween() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(10L);
+    args.add(20L);
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
+            "x", null, args);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
+    assertEquals(TruthValue.NO_NULL,
+      RecordReaderImpl.evaluatePredicateProto(createIntStats(30L, 40L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+      RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 15L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(15L, 25L), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(5L, 25L), pred, null));
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(10L, 20L), pred, null));
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(12L, 18L), pred, null));
+  }
+
+  @Test
+  public void testIsNull() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
+            "x", null, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
+  }
+
+
+  @Test
+  public void testEqualsWithNullInStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+  }
+
+  @Test
+  public void testNullSafeEqualsWithNullInStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+  }
+
+  @Test
+  public void testLessThanWithNullInStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+    assertEquals(TruthValue.NO_NULL, // min, same stats
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
+  }
+
+  @Test
+  public void testLessThanEqualsWithNullInStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING,
+            "x", "c", null);
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null)); // before
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "c", true), pred, null)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+  }
+
+  @Test
+  public void testInWithNullInStats() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add("c");
+    args.add("f");
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
+            "x", null, args);
+    assertEquals(TruthValue.NO_NULL, // before & after
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null)); // min
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+    assertEquals(TruthValue.YES_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null)); // same
+  }
+
+  @Test
+  public void testBetweenWithNullInStats() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add("c");
+    args.add("f");
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.STRING,
+            "x", null, args);
+    assertEquals(TruthValue.YES_NULL, // before & after
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("d", "e", true), pred, null));
+    assertEquals(TruthValue.YES_NULL, // before & max
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "f", true), pred, null));
+    assertEquals(TruthValue.NO_NULL, // before & before
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("h", "g", true), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL, // before & min
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("f", "g", true), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL, // before & middle
+      RecordReaderImpl.evaluatePredicateProto(createStringStats("e", "g", true), pred, null));
+
+    assertEquals(TruthValue.YES_NULL, // min & after
+      RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "e", true), pred, null));
+    assertEquals(TruthValue.YES_NULL, // min & max
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "f", true), pred, null));
+    assertEquals(TruthValue.YES_NO_NULL, // min & middle
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "g", true), pred, null));
+
+    assertEquals(TruthValue.NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "b", true), pred, null)); // after
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("a", "c", true), pred, null)); // max
+    assertEquals(TruthValue.YES_NO_NULL,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("b", "d", true), pred, null)); // middle
+    assertEquals(TruthValue.YES_NULL, // min & after, same stats
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "c", true), pred, null));
+  }
+
+  @Test
+  public void testIsNullWithNullInStats() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING,
+            "x", null, null);
+    assertEquals(TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", true), pred, null));
+    assertEquals(TruthValue.NO,
+        RecordReaderImpl.evaluatePredicateProto(createStringStats("c", "d", false), pred, null));
+  }
+
+  @Test
+  public void testOverlap() throws Exception {
+    assertTrue(!RecordReaderUtils.overlap(0, 10, -10, -1));
+    assertTrue(RecordReaderUtils.overlap(0, 10, -1, 0));
+    assertTrue(RecordReaderUtils.overlap(0, 10, -1, 1));
+    assertTrue(RecordReaderUtils.overlap(0, 10, 2, 8));
+    assertTrue(RecordReaderUtils.overlap(0, 10, 5, 10));
+    assertTrue(RecordReaderUtils.overlap(0, 10, 10, 11));
+    assertTrue(RecordReaderUtils.overlap(0, 10, 0, 10));
+    assertTrue(RecordReaderUtils.overlap(0, 10, -1, 11));
+    assertTrue(!RecordReaderUtils.overlap(0, 10, 11, 12));
+  }
+
+  private static DiskRangeList diskRanges(Integer... points) {
+    DiskRangeList head = null, tail = null;
+    for(int i = 0; i < points.length; i += 2) {
+      DiskRangeList range = new DiskRangeList(points[i], points[i+1]);
+      if (tail == null) {
+        head = tail = range;
+      } else {
+        tail = tail.insertAfter(range);
+      }
+    }
+    return head;
+  }
+
+  @Test
+  public void testGetIndexPosition() throws Exception {
+    assertEquals(0, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
+            OrcProto.Stream.Kind.PRESENT, true, true));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
+            OrcProto.Stream.Kind.DATA, true, true));
+    assertEquals(3, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
+            OrcProto.Stream.Kind.DATA, false, true));
+    assertEquals(0, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.INT,
+            OrcProto.Stream.Kind.DATA, true, false));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DICTIONARY, OrcProto.Type.Kind.STRING,
+            OrcProto.Stream.Kind.DATA, true, true));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
+            OrcProto.Stream.Kind.DATA, true, true));
+    assertEquals(3, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
+            OrcProto.Stream.Kind.DATA, false, true));
+    assertEquals(6, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
+            OrcProto.Stream.Kind.LENGTH, true, true));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.BINARY,
+            OrcProto.Stream.Kind.LENGTH, false, true));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
+            OrcProto.Stream.Kind.DATA, true, true));
+    assertEquals(3, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
+            OrcProto.Stream.Kind.DATA, false, true));
+    assertEquals(6, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
+            OrcProto.Stream.Kind.SECONDARY, true, true));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.DECIMAL,
+            OrcProto.Stream.Kind.SECONDARY, false, true));
+    assertEquals(4, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
+            OrcProto.Stream.Kind.DATA, true, true));
+    assertEquals(3, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
+            OrcProto.Stream.Kind.DATA, false, true));
+    assertEquals(7, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
+            OrcProto.Stream.Kind.SECONDARY, true, true));
+    assertEquals(5, RecordReaderUtils.getIndexPosition
+        (OrcProto.ColumnEncoding.Kind.DIRECT, OrcProto.Type.Kind.TIMESTAMP,
+            OrcProto.Stream.Kind.SECONDARY, false, true));
+  }
+
+  @Test
+  public void testPartialPlan() throws Exception {
+    DiskRangeList result;
+
+    // set the streams
+    List<OrcProto.Stream> streams = new ArrayList<OrcProto.Stream>();
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.PRESENT)
+        .setColumn(1).setLength(1000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DATA)
+        .setColumn(1).setLength(99000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.PRESENT)
+        .setColumn(2).setLength(2000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DATA)
+        .setColumn(2).setLength(98000).build());
+
+    boolean[] columns = new boolean[]{true, true, false};
+    boolean[] rowGroups = new boolean[]{true, true, false, false, true, false};
+
+    // set the index
+    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[columns.length];
+    indexes[1] = OrcProto.RowIndex.newBuilder()
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(0).addPositions(-1).addPositions(-1)
+            .addPositions(0)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(100).addPositions(-1).addPositions(-1)
+            .addPositions(10000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(200).addPositions(-1).addPositions(-1)
+            .addPositions(20000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(300).addPositions(-1).addPositions(-1)
+            .addPositions(30000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(400).addPositions(-1).addPositions(-1)
+            .addPositions(40000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(500).addPositions(-1).addPositions(-1)
+            .addPositions(50000)
+            .build())
+        .build();
+
+    // set encodings
+    List<OrcProto.ColumnEncoding> encodings =
+        new ArrayList<OrcProto.ColumnEncoding>();
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+                    .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+
+    // set types struct{x: int, y: int}
+    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
+                .addSubtypes(1).addSubtypes(2).addFieldNames("x")
+                .addFieldNames("y").build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
+
+    // filter by rows and groups
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(0, 1000, 100, 1000, 400, 1000,
+        1000, 11000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
+        11000, 21000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
+        41000, 51000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, true);
+    assertThat(result, is(diskRanges(0, 21000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
+        41000, 51000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
+
+    // if we read no rows, don't read any bytes
+    rowGroups = new boolean[]{false, false, false, false, false, false};
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, false);
+    assertNull(result);
+
+    // all rows, but only columns 0 and 2.
+    rowGroups = null;
+    columns = new boolean[]{true, false, true};
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, null, false, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(100000, 102000, 102000, 200000)));
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, null, false, encodings, types, 32768, true);
+    assertThat(result, is(diskRanges(100000, 200000)));
+
+    rowGroups = new boolean[]{false, true, false, false, false, false};
+    indexes[2] = indexes[1];
+    indexes[1] = null;
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(100100, 102000,
+        112000, 122000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, true);
+    assertThat(result, is(diskRanges(100100, 102000,
+        112000, 122000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP)));
+
+    rowGroups = new boolean[]{false, false, false, false, false, true};
+    indexes[1] = indexes[2];
+    columns = new boolean[]{true, true, true};
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(500, 1000, 51000, 100000, 100500, 102000,
+        152000, 200000)));
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, true);
+    assertThat(result, is(diskRanges(500, 1000, 51000, 100000, 100500, 102000,
+        152000, 200000)));
+  }
+
+
+  @Test
+  public void testPartialPlanCompressed() throws Exception {
+    DiskRangeList result;
+
+    // set the streams
+    List<OrcProto.Stream> streams = new ArrayList<OrcProto.Stream>();
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.PRESENT)
+        .setColumn(1).setLength(1000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DATA)
+        .setColumn(1).setLength(99000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.PRESENT)
+        .setColumn(2).setLength(2000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DATA)
+        .setColumn(2).setLength(98000).build());
+
+    boolean[] columns = new boolean[]{true, true, false};
+    boolean[] rowGroups = new boolean[]{true, true, false, false, true, false};
+
+    // set the index
+    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[columns.length];
+    indexes[1] = OrcProto.RowIndex.newBuilder()
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(0).addPositions(-1).addPositions(-1).addPositions(-1)
+            .addPositions(0)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(100).addPositions(-1).addPositions(-1).addPositions(-1)
+            .addPositions(10000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(200).addPositions(-1).addPositions(-1).addPositions(-1)
+            .addPositions(20000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(300).addPositions(-1).addPositions(-1).addPositions(-1)
+            .addPositions(30000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(400).addPositions(-1).addPositions(-1).addPositions(-1)
+            .addPositions(40000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(500).addPositions(-1).addPositions(-1).addPositions(-1)
+            .addPositions(50000)
+            .build())
+        .build();
+
+    // set encodings
+    List<OrcProto.ColumnEncoding> encodings =
+        new ArrayList<OrcProto.ColumnEncoding>();
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+
+    // set types struct{x: int, y: int}
+    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
+        .addSubtypes(1).addSubtypes(2).addFieldNames("x")
+        .addFieldNames("y").build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
+
+    // filter by rows and groups
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, true, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(0, 1000, 100, 1000,
+        400, 1000, 1000, 11000+(2*32771),
+        11000, 21000+(2*32771), 41000, 100000)));
+
+    rowGroups = new boolean[]{false, false, false, false, false, true};
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, true, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(500, 1000, 51000, 100000)));
+  }
+
+  @Test
+  public void testPartialPlanString() throws Exception {
+    DiskRangeList result;
+
+    // set the streams
+    List<OrcProto.Stream> streams = new ArrayList<OrcProto.Stream>();
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.PRESENT)
+        .setColumn(1).setLength(1000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DATA)
+        .setColumn(1).setLength(94000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.LENGTH)
+        .setColumn(1).setLength(2000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DICTIONARY_DATA)
+        .setColumn(1).setLength(3000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.PRESENT)
+        .setColumn(2).setLength(2000).build());
+    streams.add(OrcProto.Stream.newBuilder()
+        .setKind(OrcProto.Stream.Kind.DATA)
+        .setColumn(2).setLength(98000).build());
+
+    boolean[] columns = new boolean[]{true, true, false};
+    boolean[] rowGroups = new boolean[]{false, true, false, false, true, true};
+
+    // set the index
+    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[columns.length];
+    indexes[1] = OrcProto.RowIndex.newBuilder()
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(0).addPositions(-1).addPositions(-1)
+            .addPositions(0)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(100).addPositions(-1).addPositions(-1)
+            .addPositions(10000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(200).addPositions(-1).addPositions(-1)
+            .addPositions(20000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(300).addPositions(-1).addPositions(-1)
+            .addPositions(30000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(400).addPositions(-1).addPositions(-1)
+            .addPositions(40000)
+            .build())
+        .addEntry(OrcProto.RowIndexEntry.newBuilder()
+            .addPositions(500).addPositions(-1).addPositions(-1)
+            .addPositions(50000)
+            .build())
+        .build();
+
+    // set encodings
+    List<OrcProto.ColumnEncoding> encodings =
+        new ArrayList<OrcProto.ColumnEncoding>();
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DICTIONARY).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+
+    // set types struct{x: string, y: int}
+    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
+        .addSubtypes(1).addSubtypes(2).addFieldNames("x")
+        .addFieldNames("y").build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).build());
+    types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build());
+
+    // filter by rows and groups
+    result = RecordReaderImpl.planReadPartialDataStreams(streams, indexes,
+        columns, rowGroups, false, encodings, types, 32768, false);
+    assertThat(result, is(diskRanges(100, 1000, 400, 1000, 500, 1000,
+        11000, 21000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
+        41000, 51000 + RecordReaderUtils.WORST_UNCOMPRESSED_SLOP,
+        51000, 95000, 95000, 97000, 97000, 100000)));
+  }
+
+  @Test
+  public void testIntNullSafeEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong(i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong(15);
+    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testIntEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong(i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong(15);
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testIntInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(15L);
+    args.add(19L);
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong(i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong(19);
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong(15);
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDoubleNullSafeEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addDouble(i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
+    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addDouble(15.0);
+    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDoubleEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addDouble(i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addDouble(15.0);
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDoubleInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(15.0);
+    args.add(19.0);
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.FLOAT,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addDouble(i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addDouble(19.0);
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addDouble(15.0);
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testStringNullSafeEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString("str_" + i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
+    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString("str_15");
+    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testStringEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x", "str_15", null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString("str_" + i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString("str_15");
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testStringInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add("str_15");
+    args.add("str_19");
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.STRING,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString("str_" + i);
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString("str_19");
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString("str_15");
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDateWritableNullSafeEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x",
+        new DateWritable(15).get(), null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong((new DateWritable(i)).getDays());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
+    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new DateWritable(15)).getDays());
+    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDateWritableEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x",
+        new DateWritable(15).get(), null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong((new DateWritable(i)).getDays());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new DateWritable(15)).getDays());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDateWritableInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(new DateWritable(15).get());
+    args.add(new DateWritable(19).get());
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong((new DateWritable(i)).getDays());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new DateWritable(19)).getDays());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new DateWritable(15)).getDays());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testTimestampNullSafeEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x",
+        new Timestamp(15),
+        null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong((new Timestamp(i)).getTime());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new Timestamp(15)).getTime());
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testTimestampEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong((new Timestamp(i)).getTime());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new Timestamp(15)).getTime());
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testTimestampInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(new Timestamp(15));
+    args.add(new Timestamp(19));
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.TIMESTAMP,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addLong((new Timestamp(i)).getTime());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createTimestampStats(10, 100));
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new Timestamp(19)).getTime());
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addLong((new Timestamp(15)).getTime());
+    // timestamp PPD is disable until ORC-135
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDecimalNullSafeEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x",
+        new HiveDecimalWritable("15"),
+        null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString(HiveDecimal.create(i).toString());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
+    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString(HiveDecimal.create(15).toString());
+    assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDecimalEqualsBloomFilter() throws Exception {
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DECIMAL, "x",
+        new HiveDecimalWritable("15"),
+        null);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString(HiveDecimal.create(i).toString());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString(HiveDecimal.create(15).toString());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testDecimalInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(new HiveDecimalWritable("15"));
+    args.add(new HiveDecimalWritable("19"));
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString(HiveDecimal.create(i).toString());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
+    assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString(HiveDecimal.create(19).toString());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString(HiveDecimal.create(15).toString());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testNullsInBloomFilter() throws Exception {
+    List<Object> args = new ArrayList<Object>();
+    args.add(new HiveDecimalWritable("15"));
+    args.add(null);
+    args.add(new HiveDecimalWritable("19"));
+    PredicateLeaf pred = createPredicateLeaf
+        (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DECIMAL,
+            "x", null, args);
+    BloomFilterIO bf = new BloomFilterIO(10000);
+    for (int i = 20; i < 1000; i++) {
+      bf.addString(HiveDecimal.create(i).toString());
+    }
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200", false));
+    // hasNull is false, so bloom filter should return NO
+    assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200", true));
+    // hasNull is true, so bloom filter should return YES_NO_NULL
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString(HiveDecimal.create(19).toString());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+
+    bf.addString(HiveDecimal.create(15).toString());
+    assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
+  }
+
+  @Test
+  public void testClose() throws Exception {
+    DataReader mockedDataReader = mock(DataReader.class);
+    closeMockedRecordReader(mockedDataReader);
+
+    verify(mockedDataReader, atLeastOnce()).close();
+  }
+
+  @Test
+  public void testCloseWithException() throws Exception {
+    DataReader mockedDataReader = mock(DataReader.class);
+    doThrow(IOException.class).when(mockedDataReader).close();
+
+    try {
+      closeMockedRecordReader(mockedDataReader);
+      fail("Exception should have been thrown when Record Reader was closed");
+    } catch (IOException expected) {
+
+    }
+
+    verify(mockedDataReader, atLeastOnce()).close();
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  private void closeMockedRecordReader(DataReader mockedDataReader) throws IOException {
+    Configuration conf = new Configuration();
+    Path path = new Path(workDir, "empty.orc");
+    FileSystem.get(conf).delete(path, true);
+    Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
+        .setSchema(TypeDescription.createLong()));
+    writer.close();
+    Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+
+    RecordReader recordReader = reader.rows(new Reader.Options()
+        .dataReader(mockedDataReader));
+
+    recordReader.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRunLengthByteReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRunLengthByteReader.java b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthByteReader.java
new file mode 100644
index 0000000..ac3f93e
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthByteReader.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestRunLengthByteReader {
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
+        null, collect));
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[2048];
+    for(int i=0; i < 2048; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      if (i < 1024) {
+        out.write((byte) (i/4));
+      } else {
+        out.write((byte) i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), null, 100));
+    for(int i=0; i < 2048; ++i) {
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    CompressionCodec codec = new SnappyCodec();
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 500,
+        codec, collect));
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[2048];
+    for(int i=0; i < 2048; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      if (i < 1024) {
+        out.write((byte) (i/4));
+      } else {
+        out.write((byte) i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), codec, 500));
+    for(int i=0; i < 2048; ++i) {
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/4) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+    }
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
+        null, collect));
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write((byte) (i/16));
+      } else {
+        out.write((byte) i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
+        new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), null, 100));
+    for(int i=0; i < 2048; i += 10) {
+      int x = in.next() & 0xff;
+      if (i < 1024) {
+        assertEquals((i/16) & 0xff, x);
+      } else {
+        assertEquals(i & 0xff, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}


[12/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestNewIntegerEncoding.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestNewIntegerEncoding.java b/orc/src/test/org/apache/hive/orc/TestNewIntegerEncoding.java
new file mode 100644
index 0000000..7f598c7
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestNewIntegerEncoding.java
@@ -0,0 +1,1373 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+
+@RunWith(value = Parameterized.class)
+public class TestNewIntegerEncoding {
+
+  private OrcFile.EncodingStrategy encodingStrategy;
+
+  public TestNewIntegerEncoding( OrcFile.EncodingStrategy es) {
+    this.encodingStrategy = es;
+  }
+
+  @Parameters
+  public static Collection<Object[]> data() {
+    Object[][] data = new Object[][] { {  OrcFile.EncodingStrategy.COMPRESSION },
+        {  OrcFile.EncodingStrategy.SPEED } };
+    return Arrays.asList(data);
+  }
+
+  public static class TSRow {
+    Timestamp ts;
+
+    public TSRow(Timestamp ts) {
+      this.ts = ts;
+    }
+  }
+
+  public static TypeDescription getRowSchema() {
+    return TypeDescription.createStruct()
+        .addField("int1", TypeDescription.createInt())
+        .addField("long1", TypeDescription.createLong());
+  }
+
+  public static void appendRow(VectorizedRowBatch batch,
+                               int int1, long long1) {
+    int row = batch.size++;
+    ((LongColumnVector) batch.cols[0]).vector[row] = int1;
+    ((LongColumnVector) batch.cols[1]).vector[row] = long1;
+  }
+
+  public static void appendLong(VectorizedRowBatch batch,
+                                long long1) {
+    int row = batch.size++;
+    ((LongColumnVector) batch.cols[0]).vector[row] = long1;
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target"
+      + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile."
+        + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testBasicRow() throws Exception {
+    TypeDescription schema= getRowSchema();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000)
+                                         .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    appendRow(batch, 111, 1111L);
+    appendRow(batch, 111, 1111L);
+    appendRow(batch, 111, 1111L);
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(111, ((LongColumnVector) batch.cols[0]).vector[r]);
+        assertEquals(1111, ((LongColumnVector) batch.cols[1]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testBasicOld() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+    long[] inp = new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6,
+        7, 8, 9, 10, 1, 1, 1, 1, 1, 1, 10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1,
+        2, 5, 1, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1,
+        9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
+        1, 1, 1, 1 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .compress(CompressionKind.NONE)
+                                         .version(OrcFile.Version.V_0_11)
+                                         .bufferSize(10000)
+                                         .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    batch = reader.getSchema().createRowBatch();
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testBasicNew() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6,
+        7, 8, 9, 10, 1, 1, 1, 1, 1, 1, 10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1,
+        2, 5, 1, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1,
+        9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
+        1, 1, 1, 1 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    batch = reader.getSchema().createRowBatch();
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testBasicDelta1() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { -500, -400, -350, -325, -310 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testBasicDelta2() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { -500, -600, -650, -675, -710 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testBasicDelta3() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 500, 400, 350, 325, 310 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testBasicDelta4() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 500, 600, 650, 675, 710 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testDeltaOverflow() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[]{4513343538618202719l, 4513343538618202711l,
+        2911390882471569739l,
+        -9181829309989854913l};
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .compress(CompressionKind.NONE).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile
+        .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testDeltaOverflow2() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[]{Long.MAX_VALUE, 4513343538618202711l,
+        2911390882471569739l,
+        Long.MIN_VALUE};
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .compress(CompressionKind.NONE).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile
+        .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testDeltaOverflow3() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[]{-4513343538618202711l, -2911390882471569739l, -2,
+        Long.MAX_VALUE};
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .compress(CompressionKind.NONE).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile
+        .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testIntegerMin() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    input.add((long) Integer.MIN_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testIntegerMax() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    input.add((long) Integer.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testLongMin() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    input.add(Long.MIN_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testLongMax() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    input.add(Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testRandomInt() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 100000; i++) {
+      input.add((long) rand.nextInt());
+    }
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(100000);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testRandomLong() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 100000; i++) {
+      input.add(rand.nextLong());
+    }
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(100000);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseNegativeMin() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
+        3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
+        1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
+        52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
+        2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -13, 1, 2, 3,
+        13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
+        141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
+        13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
+        1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
+        2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
+        1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
+        2, 16 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseNegativeMin2() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
+        3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
+        1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
+        52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
+        2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -1, 1, 2, 3,
+        13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
+        141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
+        13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
+        1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
+        2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
+        1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
+        2, 16 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseNegativeMin3() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
+        3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
+        1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
+        52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
+        2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, 0, 1, 2, 3,
+        13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
+        141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
+        13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
+        1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
+        2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
+        1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
+        2, 16 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseNegativeMin4() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { 13, 13, 11, 8, 13, 10, 10, 11, 11, 14, 11, 7, 13,
+        12, 12, 11, 15, 12, 12, 9, 8, 10, 13, 11, 8, 6, 5, 6, 11, 7, 15, 10, 7,
+        6, 8, 7, 9, 9, 11, 33, 11, 3, 7, 4, 6, 10, 14, 12, 5, 14, 7, 6 };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseAt0() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(100));
+    }
+    input.set(0, 20000L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseAt1() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(100));
+    }
+    input.set(1, 20000L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseAt255() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(100));
+    }
+    input.set(255, 20000L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseAt256() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(100));
+    }
+    input.set(256, 20000L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBase510() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(100));
+    }
+    input.set(510, 20000L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBase511() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(100));
+    }
+    input.set(511, 20000L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax1() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for (int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(60));
+    }
+    input.set(511, Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax2() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for (int i = 0; i < 5120; i++) {
+      input.add((long) rand.nextInt(60));
+    }
+    input.set(128, Long.MAX_VALUE);
+    input.set(256, Long.MAX_VALUE);
+    input.set(511, Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(5120);
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax3() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    input.add(371946367L);
+    input.add(11963367L);
+    input.add(68639400007L);
+    input.add(100233367L);
+    input.add(6367L);
+    input.add(10026367L);
+    input.add(3670000L);
+    input.add(3602367L);
+    input.add(4719226367L);
+    input.add(7196367L);
+    input.add(444442L);
+    input.add(210267L);
+    input.add(21033L);
+    input.add(160267L);
+    input.add(400267L);
+    input.add(23634347L);
+    input.add(16027L);
+    input.add(46026367L);
+    input.add(Long.MAX_VALUE);
+    input.add(33333L);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseMax4() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    for (int i = 0; i < 25; i++) {
+      input.add(371292224226367L);
+      input.add(119622332222267L);
+      input.add(686329400222007L);
+      input.add(100233333222367L);
+      input.add(636272333322222L);
+      input.add(10202633223267L);
+      input.add(36700222022230L);
+      input.add(36023226224227L);
+      input.add(47192226364427L);
+      input.add(71963622222447L);
+      input.add(22244444222222L);
+      input.add(21220263327442L);
+      input.add(21032233332232L);
+      input.add(16026322232227L);
+      input.add(40022262272212L);
+      input.add(23634342227222L);
+      input.add(16022222222227L);
+      input.add(46026362222227L);
+      input.add(46026362222227L);
+      input.add(33322222222323L);
+    }
+    input.add(Long.MAX_VALUE);
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+  @Test
+  public void testPatchedBaseTimestamp() throws Exception {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("ts", TypeDescription.createTimestamp());
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+
+    List<Timestamp> tslist = Lists.newArrayList();
+    tslist.add(Timestamp.valueOf("2099-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("1999-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("1995-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2010-03-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("1996-08-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("1998-11-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("1993-08-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("2008-01-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("2007-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
+    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2008-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("1994-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2001-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2000-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2000-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2011-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
+    tslist.add(Timestamp.valueOf("1974-01-01 00:00:00"));
+    int idx = 0;
+    for (Timestamp ts : tslist) {
+      ((TimestampColumnVector) batch.cols[0]).set(idx, ts);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(tslist.get(idx++),
+            ((TimestampColumnVector) batch.cols[0]).asScratchTimestamp(r));
+      }
+    }
+  }
+
+  @Test
+  public void testDirectLargeNegatives() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch();
+
+    appendLong(batch, -7486502418706614742L);
+    appendLong(batch, 0L);
+    appendLong(batch, 1L);
+    appendLong(batch, 1L);
+    appendLong(batch, -5535739865598783616L);
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    assertEquals(true, rows.nextBatch(batch));
+    assertEquals(5, batch.size);
+    assertEquals(-7486502418706614742L,
+        ((LongColumnVector) batch.cols[0]).vector[0]);
+    assertEquals(0L,
+        ((LongColumnVector) batch.cols[0]).vector[1]);
+    assertEquals(1L,
+        ((LongColumnVector) batch.cols[0]).vector[2]);
+    assertEquals(1L,
+        ((LongColumnVector) batch.cols[0]).vector[3]);
+    assertEquals(-5535739865598783616L,
+        ((LongColumnVector) batch.cols[0]).vector[4]);
+    assertEquals(false, rows.nextBatch(batch));
+  }
+
+  @Test
+  public void testSeek() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    List<Long> input = Lists.newArrayList();
+    Random rand = new Random();
+    for(int i = 0; i < 100000; i++) {
+      input.add((long) rand.nextInt());
+    }
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .compress(CompressionKind.NONE)
+            .stripeSize(100000)
+            .bufferSize(10000)
+            .version(OrcFile.Version.V_0_11)
+            .encodingStrategy(encodingStrategy));
+    VectorizedRowBatch batch = schema.createRowBatch(100000);
+    for(Long l : input) {
+      appendLong(batch, l);
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 55555;
+    rows.seekToRow(idx);
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestOrcNullOptimization.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestOrcNullOptimization.java b/orc/src/test/org/apache/hive/orc/TestOrcNullOptimization.java
new file mode 100644
index 0000000..bf9b902
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestOrcNullOptimization.java
@@ -0,0 +1,415 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Random;
+
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import org.apache.hive.orc.impl.RecordReaderImpl;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import com.google.common.collect.Lists;
+
+public class TestOrcNullOptimization {
+
+  TypeDescription createMyStruct() {
+    return TypeDescription.createStruct()
+        .addField("a", TypeDescription.createInt())
+        .addField("b", TypeDescription.createString())
+        .addField("c", TypeDescription.createBoolean())
+        .addField("d", TypeDescription.createList(
+            TypeDescription.createStruct()
+                .addField("z", TypeDescription.createInt())));
+  }
+
+  void addRow(Writer writer, VectorizedRowBatch batch,
+              Integer a, String b, Boolean c,
+              Integer... d) throws IOException {
+    if (batch.size == batch.getMaxSize()) {
+      writer.addRowBatch(batch);
+      batch.reset();
+    }
+    int row = batch.size++;
+    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
+    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
+    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
+    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
+    StructColumnVector dStruct = (StructColumnVector) dColumn.child;
+    LongColumnVector dInt = (LongColumnVector) dStruct.fields[0];
+    if (a == null) {
+      aColumn.noNulls = false;
+      aColumn.isNull[row] = true;
+    } else {
+      aColumn.vector[row] = a;
+    }
+    if (b == null) {
+      bColumn.noNulls = false;
+      bColumn.isNull[row] = true;
+    } else {
+      bColumn.setVal(row, b.getBytes());
+    }
+    if (c == null) {
+      cColumn.noNulls = false;
+      cColumn.isNull[row] = true;
+    } else {
+      cColumn.vector[row] = c ? 1 : 0;
+    }
+    if (d == null) {
+      dColumn.noNulls = false;
+      dColumn.isNull[row] = true;
+    } else {
+      dColumn.offsets[row] = dColumn.childCount;
+      dColumn.lengths[row] = d.length;
+      dColumn.childCount += d.length;
+      for(int e=0; e < d.length; ++e) {
+        dInt.vector[(int) dColumn.offsets[row] + e] = d[e];
+      }
+    }
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcNullOptimization." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testMultiStripeWithNull() throws Exception {
+    TypeDescription schema = createMyStruct();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
+    Random rand = new Random(100);
+    VectorizedRowBatch batch = schema.createRowBatch();
+    addRow(writer, batch, null, null, true, 100);
+    for (int i = 2; i < 20000; i++) {
+      addRow(writer, batch, rand.nextInt(1), "a", true, 100);
+    }
+    addRow(writer, batch, null, null, true, 100);
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(20000, reader.getNumberOfRows());
+    assertEquals(20000, stats[0].getNumberOfValues());
+
+    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMaximum());
+    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
+    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
+    assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0",
+        stats[1].toString());
+
+    assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
+    assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
+    assertEquals(19998, stats[2].getNumberOfValues());
+    assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998",
+        stats[2].toString());
+
+    // check the inspectors
+    assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
+        reader.getSchema().toString());
+
+    RecordReader rows = reader.rows();
+
+    List<Boolean> expected = Lists.newArrayList();
+    for (StripeInformation sinfo : reader.getStripes()) {
+      expected.add(false);
+    }
+    // only the first and last stripe will have PRESENT stream
+    expected.set(0, true);
+    expected.set(expected.size() - 1, true);
+
+    List<Boolean> got = Lists.newArrayList();
+    // check if the strip footer contains PRESENT stream
+    for (StripeInformation sinfo : reader.getStripes()) {
+      OrcProto.StripeFooter sf =
+        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
+      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
+              != -1);
+    }
+    assertEquals(expected, got);
+
+    batch = reader.getSchema().createRowBatch();
+    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
+    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
+    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
+    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
+    LongColumnVector dElements =
+        (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
+    assertEquals(true , rows.nextBatch(batch));
+    assertEquals(1024, batch.size);
+
+    // row 1
+    assertEquals(true, aColumn.isNull[0]);
+    assertEquals(true, bColumn.isNull[0]);
+    assertEquals(1, cColumn.vector[0]);
+    assertEquals(0, dColumn.offsets[0]);
+    assertEquals(1, dColumn.lengths[1]);
+    assertEquals(100, dElements.vector[0]);
+
+    rows.seekToRow(19998);
+    rows.nextBatch(batch);
+    assertEquals(2, batch.size);
+
+    // last-1 row
+    assertEquals(0, aColumn.vector[0]);
+    assertEquals("a", bColumn.toString(0));
+    assertEquals(1, cColumn.vector[0]);
+    assertEquals(0, dColumn.offsets[0]);
+    assertEquals(1, dColumn.lengths[0]);
+    assertEquals(100, dElements.vector[0]);
+
+    // last row
+    assertEquals(true, aColumn.isNull[1]);
+    assertEquals(true, bColumn.isNull[1]);
+    assertEquals(1, cColumn.vector[1]);
+    assertEquals(1, dColumn.offsets[1]);
+    assertEquals(1, dColumn.lengths[1]);
+    assertEquals(100, dElements.vector[1]);
+
+    assertEquals(false, rows.nextBatch(batch));
+    rows.close();
+  }
+
+  @Test
+  public void testMultiStripeWithoutNull() throws Exception {
+    TypeDescription schema = createMyStruct();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(100000)
+                                         .compress(CompressionKind.NONE)
+                                         .bufferSize(10000));
+    Random rand = new Random(100);
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (int i = 1; i < 20000; i++) {
+      addRow(writer, batch, rand.nextInt(1), "a", true, 100);
+    }
+    addRow(writer, batch, 0, "b", true, 100);
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(20000, reader.getNumberOfRows());
+    assertEquals(20000, stats[0].getNumberOfValues());
+
+    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMaximum());
+    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
+    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
+    assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0",
+        stats[1].toString());
+
+    assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
+    assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
+    assertEquals(20000, stats[2].getNumberOfValues());
+    assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000",
+        stats[2].toString());
+
+    // check the inspectors
+    Assert.assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
+        reader.getSchema().toString());
+
+    RecordReader rows = reader.rows();
+
+    // none of the stripes will have PRESENT stream
+    List<Boolean> expected = Lists.newArrayList();
+    for (StripeInformation sinfo : reader.getStripes()) {
+      expected.add(false);
+    }
+
+    List<Boolean> got = Lists.newArrayList();
+    // check if the strip footer contains PRESENT stream
+    for (StripeInformation sinfo : reader.getStripes()) {
+      OrcProto.StripeFooter sf =
+        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
+      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
+              != -1);
+    }
+    assertEquals(expected, got);
+
+    rows.seekToRow(19998);
+
+    batch = reader.getSchema().createRowBatch();
+    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
+    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
+    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
+    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
+    LongColumnVector dElements =
+        (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
+
+    assertEquals(true, rows.nextBatch(batch));
+    assertEquals(2, batch.size);
+
+    // last-1 row
+    assertEquals(0, aColumn.vector[0]);
+    assertEquals("a", bColumn.toString(0));
+    assertEquals(1, cColumn.vector[0]);
+    assertEquals(0, dColumn.offsets[0]);
+    assertEquals(1, dColumn.lengths[0]);
+    assertEquals(100, dElements.vector[0]);
+
+    // last row
+    assertEquals(0, aColumn.vector[1]);
+    assertEquals("b", bColumn.toString(1));
+    assertEquals(1, cColumn.vector[1]);
+    assertEquals(1, dColumn.offsets[1]);
+    assertEquals(1, dColumn.lengths[1]);
+    assertEquals(100, dElements.vector[1]);
+    rows.close();
+  }
+
+  @Test
+  public void testColumnsWithNullAndCompression() throws Exception {
+    TypeDescription schema = createMyStruct();
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .setSchema(schema)
+                                         .stripeSize(100000)
+                                         .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    addRow(writer, batch, 3, "a", true, 100);
+    addRow(writer, batch, null, "b", true, 100);
+    addRow(writer, batch, 3, null, false, 100);
+    addRow(writer, batch, 3, "d", true, 100);
+    addRow(writer, batch, 2, "e", true, 100);
+    addRow(writer, batch, 2, "f", true, 100);
+    addRow(writer, batch, 2, "g", true, 100);
+    addRow(writer, batch, 2, "h", true, 100);
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    // check the stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(8, reader.getNumberOfRows());
+    assertEquals(8, stats[0].getNumberOfValues());
+
+    assertEquals(3, ((IntegerColumnStatistics) stats[1]).getMaximum());
+    assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum());
+    assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
+    assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum());
+    assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17",
+        stats[1].toString());
+
+    assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
+    assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
+    assertEquals(7, stats[2].getNumberOfValues());
+    assertEquals("count: 7 hasNull: true min: a max: h sum: 7",
+        stats[2].toString());
+
+    // check the inspectors
+    batch = reader.getSchema().createRowBatch();
+    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
+    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
+    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
+    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
+    LongColumnVector dElements =
+        (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
+    Assert.assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
+        reader.getSchema().toString());
+
+    RecordReader rows = reader.rows();
+    // only the last strip will have PRESENT stream
+    List<Boolean> expected = Lists.newArrayList();
+    for (StripeInformation sinfo : reader.getStripes()) {
+      expected.add(false);
+    }
+    expected.set(expected.size() - 1, true);
+
+    List<Boolean> got = Lists.newArrayList();
+    // check if the strip footer contains PRESENT stream
+    for (StripeInformation sinfo : reader.getStripes()) {
+      OrcProto.StripeFooter sf =
+        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
+      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
+              != -1);
+    }
+    assertEquals(expected, got);
+
+    assertEquals(true, rows.nextBatch(batch));
+    assertEquals(8, batch.size);
+
+    // row 1
+    assertEquals(3, aColumn.vector[0]);
+    assertEquals("a", bColumn.toString(0));
+    assertEquals(1, cColumn.vector[0]);
+    assertEquals(0, dColumn.offsets[0]);
+    assertEquals(1, dColumn.lengths[0]);
+    assertEquals(100, dElements.vector[0]);
+
+    // row 2
+    assertEquals(true, aColumn.isNull[1]);
+    assertEquals("b", bColumn.toString(1));
+    assertEquals(1, cColumn.vector[1]);
+    assertEquals(1, dColumn.offsets[1]);
+    assertEquals(1, dColumn.lengths[1]);
+    assertEquals(100, dElements.vector[1]);
+
+    // row 3
+    assertEquals(3, aColumn.vector[2]);
+    assertEquals(true, bColumn.isNull[2]);
+    assertEquals(0, cColumn.vector[2]);
+    assertEquals(2, dColumn.offsets[2]);
+    assertEquals(1, dColumn.lengths[2]);
+    assertEquals(100, dElements.vector[2]);
+
+    rows.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestOrcTimezone1.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestOrcTimezone1.java b/orc/src/test/org/apache/hive/orc/TestOrcTimezone1.java
new file mode 100644
index 0000000..6c65c74
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestOrcTimezone1.java
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNotNull;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.TimeZone;
+
+import junit.framework.Assert;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestOrcTimezone1 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+  String writerTimeZone;
+  String readerTimeZone;
+  static TimeZone defaultTimeZone = TimeZone.getDefault();
+
+  public TestOrcTimezone1(String writerTZ, String readerTZ) {
+    this.writerTimeZone = writerTZ;
+    this.readerTimeZone = readerTZ;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    List<Object[]> result = Arrays.asList(new Object[][]{
+        /* Extreme timezones */
+        {"GMT-12:00", "GMT+14:00"},
+        /* No difference in DST */
+        {"America/Los_Angeles", "America/Los_Angeles"}, /* same timezone both with DST */
+        {"Europe/Berlin", "Europe/Berlin"}, /* same as above but europe */
+        {"America/Phoenix", "Asia/Kolkata"} /* Writer no DST, Reader no DST */,
+        {"Europe/Berlin", "America/Los_Angeles"} /* Writer DST, Reader DST */,
+        {"Europe/Berlin", "America/Chicago"} /* Writer DST, Reader DST */,
+        /* With DST difference */
+        {"Europe/Berlin", "UTC"},
+        {"UTC", "Europe/Berlin"} /* Writer no DST, Reader DST */,
+        {"America/Los_Angeles", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
+        {"Europe/Berlin", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
+        /* Timezone offsets for the reader has changed historically */
+        {"Asia/Saigon", "Pacific/Enderbury"},
+        {"UTC", "Asia/Jerusalem"},
+
+        // NOTE:
+        // "1995-01-01 03:00:00.688888888" this is not a valid time in Pacific/Enderbury timezone.
+        // On 1995-01-01 00:00:00 GMT offset moved from -11:00 hr to +13:00 which makes all values
+        // on 1995-01-01 invalid. Try this with joda time
+        // new MutableDateTime("1995-01-01", DateTimeZone.forTimeZone(readerTimeZone));
+    });
+    return result;
+  }
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @After
+  public void restoreTimeZone() {
+    TimeZone.setDefault(defaultTimeZone);
+  }
+
+  @Test
+  public void testTimestampWriter() throws Exception {
+    TypeDescription schema = TypeDescription.createTimestamp();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .bufferSize(10000));
+    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
+    List<String> ts = Lists.newArrayList();
+    ts.add("2003-01-01 01:00:00.000000222");
+    ts.add("1996-08-02 09:00:00.723100809");
+    ts.add("1999-01-01 02:00:00.999999999");
+    ts.add("1995-01-02 03:00:00.688888888");
+    ts.add("2002-01-01 04:00:00.1");
+    ts.add("2010-03-02 05:00:00.000009001");
+    ts.add("2005-01-01 06:00:00.000002229");
+    ts.add("2006-01-01 07:00:00.900203003");
+    ts.add("2003-01-01 08:00:00.800000007");
+    ts.add("1998-11-02 10:00:00.857340643");
+    ts.add("2008-10-02 11:00:00.0");
+    ts.add("2037-01-01 00:00:00.000999");
+    ts.add("2014-03-28 00:00:00.0");
+    VectorizedRowBatch batch = schema.createRowBatch();
+    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
+    for (String t : ts) {
+      times.set(batch.size++, Timestamp.valueOf(t));
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    times = (TimestampColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
+      }
+    }
+    rows.close();
+  }
+
+  @Test
+  public void testReadTimestampFormat_0_11() throws Exception {
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Path oldFilePath = new Path(getClass().getClassLoader().
+        getSystemResource("orc-file-11-format.orc").getPath());
+    Reader reader = OrcFile.createReader(oldFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    TypeDescription schema = reader.getSchema();
+    int col = schema.getFieldNames().indexOf("ts");
+    VectorizedRowBatch batch = schema.createRowBatch(10);
+    TimestampColumnVector ts = (TimestampColumnVector) batch.cols[col];
+
+    boolean[] include = new boolean[schema.getMaximumId() + 1];
+    include[schema.getChildren().get(col).getId()] = true;
+    RecordReader rows = reader.rows
+        (new Reader.Options().include(include));
+    assertEquals(true, rows.nextBatch(batch));
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
+        ts.asScratchTimestamp(0));
+
+    // check the contents of second row
+    rows.seekToRow(7499);
+    assertEquals(true, rows.nextBatch(batch));
+    assertEquals(1, batch.size);
+    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
+        ts.asScratchTimestamp(0));
+
+    // handle the close up
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    rows.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestOrcTimezone2.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestOrcTimezone2.java b/orc/src/test/org/apache/hive/orc/TestOrcTimezone2.java
new file mode 100644
index 0000000..67d3f16
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestOrcTimezone2.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+import java.util.TimeZone;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestOrcTimezone2 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+  String writerTimeZone;
+  String readerTimeZone;
+  static TimeZone defaultTimeZone = TimeZone.getDefault();
+
+  public TestOrcTimezone2(String writerTZ, String readerTZ) {
+    this.writerTimeZone = writerTZ;
+    this.readerTimeZone = readerTZ;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    String[] allTimeZones = TimeZone.getAvailableIDs();
+    Random rand = new Random(123);
+    int len = allTimeZones.length;
+    int n = 500;
+    Object[][] data = new Object[n][];
+    for (int i = 0; i < n; i++) {
+      int wIdx = rand.nextInt(len);
+      int rIdx = rand.nextInt(len);
+      data[i] = new Object[2];
+      data[i][0] = allTimeZones[wIdx];
+      data[i][1] = allTimeZones[rIdx];
+    }
+    return Arrays.asList(data);
+  }
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @After
+  public void restoreTimeZone() {
+    TimeZone.setDefault(defaultTimeZone);
+  }
+
+  @Test
+  public void testTimestampWriter() throws Exception {
+    TypeDescription schema = TypeDescription.createTimestamp();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+            .stripeSize(100000).bufferSize(10000));
+    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
+    List<String> ts = Lists.newArrayList();
+    ts.add("2003-01-01 01:00:00.000000222");
+    ts.add("1999-01-01 02:00:00.999999999");
+    ts.add("1995-01-02 03:00:00.688888888");
+    ts.add("2002-01-01 04:00:00.1");
+    ts.add("2010-03-02 05:00:00.000009001");
+    ts.add("2005-01-01 06:00:00.000002229");
+    ts.add("2006-01-01 07:00:00.900203003");
+    ts.add("2003-01-01 08:00:00.800000007");
+    ts.add("1996-08-02 09:00:00.723100809");
+    ts.add("1998-11-02 10:00:00.857340643");
+    ts.add("2008-10-02 11:00:00.0");
+    ts.add("2037-01-01 00:00:00.000999");
+    VectorizedRowBatch batch = schema.createRowBatch();
+    TimestampColumnVector tsc = (TimestampColumnVector) batch.cols[0];
+    for (String t : ts) {
+      tsc.set(batch.size++, Timestamp.valueOf(t));
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    batch = reader.getSchema().createRowBatch();
+    tsc = (TimestampColumnVector) batch.cols[0];
+    while (rows.nextBatch(batch)) {
+      for (int r=0; r < batch.size; ++r) {
+        assertEquals(ts.get(idx++), tsc.asScratchTimestamp(r).toString());
+      }
+    }
+    rows.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestOrcTimezone3.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestOrcTimezone3.java b/orc/src/test/org/apache/hive/orc/TestOrcTimezone3.java
new file mode 100644
index 0000000..25d9720
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestOrcTimezone3.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.TimeZone;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestOrcTimezone3 {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+  String writerTimeZone;
+  String readerTimeZone;
+  static TimeZone defaultTimeZone = TimeZone.getDefault();
+
+  public TestOrcTimezone3(String writerTZ, String readerTZ) {
+    this.writerTimeZone = writerTZ;
+    this.readerTimeZone = readerTZ;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    List<Object[]> result = Arrays.asList(new Object[][]{
+        {"America/Chicago", "America/Los_Angeles"},
+    });
+    return result;
+  }
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcTimezone3." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @After
+  public void restoreTimeZone() {
+    TimeZone.setDefault(defaultTimeZone);
+  }
+
+  @Test
+  public void testTimestampWriter() throws Exception {
+    TypeDescription schema = TypeDescription.createTimestamp();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .bufferSize(10000));
+    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
+    List<String> ts = Lists.newArrayList();
+    ts.add("1969-12-31 16:00:14.007");
+    ts.add("1969-12-31 16:00:06.021");
+    ts.add("1969-12-31 16:00:03.963");
+    VectorizedRowBatch batch = schema.createRowBatch();
+    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
+    for (String t : ts) {
+      times.set(batch.size++, Timestamp.valueOf(t));
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    times = (TimestampColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
+      }
+    }
+    rows.close();
+  }
+}


[30/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
new file mode 100644
index 0000000..47c33bb
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/PhysicalFsWriter.java
@@ -0,0 +1,529 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.CompressionCodec;
+import org.apache.hive.orc.CompressionCodec.Modifier;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.OrcFile.CompressionStrategy;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.OrcProto.BloomFilterIndex;
+import org.apache.hive.orc.OrcProto.Footer;
+import org.apache.hive.orc.OrcProto.Metadata;
+import org.apache.hive.orc.OrcProto.PostScript;
+import org.apache.hive.orc.OrcProto.Stream.Kind;
+import org.apache.hive.orc.OrcProto.StripeFooter;
+import org.apache.hive.orc.OrcProto.StripeInformation;
+import org.apache.hive.orc.OrcProto.RowIndex.Builder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.CodedOutputStream;
+
+public class PhysicalFsWriter implements PhysicalWriter {
+  private static final Logger LOG = LoggerFactory.getLogger(PhysicalFsWriter.class);
+
+  private static final int HDFS_BUFFER_SIZE = 256 * 1024;
+
+  private FSDataOutputStream rawWriter = null;
+  // the compressed metadata information outStream
+  private OutStream writer = null;
+  // a protobuf outStream around streamFactory
+  private CodedOutputStream protobufWriter = null;
+
+  private final FileSystem fs;
+  private final Path path;
+  private final long blockSize;
+  private final int bufferSize;
+  private final CompressionCodec codec;
+  private final double paddingTolerance;
+  private final long defaultStripeSize;
+  private final CompressionKind compress;
+  private final boolean addBlockPadding;
+  private final CompressionStrategy compressionStrategy;
+
+  // the streams that make up the current stripe
+  private final Map<StreamName, BufferedStream> streams =
+    new TreeMap<StreamName, BufferedStream>();
+
+  private long adjustedStripeSize;
+  private long headerLength;
+  private long stripeStart;
+  private int metadataLength;
+  private int footerLength;
+
+  public PhysicalFsWriter(FileSystem fs, Path path, int numColumns, OrcFile.WriterOptions opts) {
+    this.fs = fs;
+    this.path = path;
+    this.defaultStripeSize = this.adjustedStripeSize = opts.getStripeSize();
+    this.addBlockPadding = opts.getBlockPadding();
+    if (opts.isEnforceBufferSize()) {
+      this.bufferSize = opts.getBufferSize();
+    } else {
+      this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize());
+    }
+    this.compress = opts.getCompress();
+    this.compressionStrategy = opts.getCompressionStrategy();
+    codec = createCodec(compress);
+    this.paddingTolerance = opts.getPaddingTolerance();
+    this.blockSize = opts.getBlockSize();
+    LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
+        " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
+        compress, bufferSize);
+  }
+
+  @Override
+  public void initialize() throws IOException {
+    if (rawWriter != null) return;
+    rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
+                          fs.getDefaultReplication(path), blockSize);
+    rawWriter.writeBytes(OrcFile.MAGIC);
+    headerLength = rawWriter.getPos();
+    writer = new OutStream("metadata", bufferSize, codec,
+                           new DirectStream(rawWriter));
+    protobufWriter = CodedOutputStream.newInstance(writer);
+  }
+
+  private void padStripe(long indexSize, long dataSize, int footerSize) throws IOException {
+    this.stripeStart = rawWriter.getPos();
+    final long currentStripeSize = indexSize + dataSize + footerSize;
+    final long available = blockSize - (stripeStart % blockSize);
+    final long overflow = currentStripeSize - adjustedStripeSize;
+    final float availRatio = (float) available / (float) defaultStripeSize;
+
+    if (availRatio > 0.0f && availRatio < 1.0f
+        && availRatio > paddingTolerance) {
+      // adjust default stripe size to fit into remaining space, also adjust
+      // the next stripe for correction based on the current stripe size
+      // and user specified padding tolerance. Since stripe size can overflow
+      // the default stripe size we should apply this correction to avoid
+      // writing portion of last stripe to next hdfs block.
+      double correction = overflow > 0 ? (double) overflow
+          / (double) adjustedStripeSize : 0.0;
+
+      // correction should not be greater than user specified padding
+      // tolerance
+      correction = correction > paddingTolerance ? paddingTolerance
+          : correction;
+
+      // adjust next stripe size based on current stripe estimate correction
+      adjustedStripeSize = (long) ((1.0f - correction) * (availRatio * defaultStripeSize));
+    } else if (availRatio >= 1.0) {
+      adjustedStripeSize = defaultStripeSize;
+    }
+
+    if (availRatio < paddingTolerance && addBlockPadding) {
+      long padding = blockSize - (stripeStart % blockSize);
+      byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, padding)];
+      LOG.info(String.format("Padding ORC by %d bytes (<=  %.2f * %d)",
+          padding, availRatio, defaultStripeSize));
+      stripeStart += padding;
+      while (padding > 0) {
+        int writeLen = (int) Math.min(padding, pad.length);
+        rawWriter.write(pad, 0, writeLen);
+        padding -= writeLen;
+      }
+      adjustedStripeSize = defaultStripeSize;
+    } else if (currentStripeSize < blockSize
+        && (stripeStart % blockSize) + currentStripeSize > blockSize) {
+      // even if you don't pad, reset the default stripe size when crossing a
+      // block boundary
+      adjustedStripeSize = defaultStripeSize;
+    }
+  }
+
+  /**
+   * An output receiver that writes the ByteBuffers to the output stream
+   * as they are received.
+   */
+  private class DirectStream implements OutStream.OutputReceiver {
+    private final FSDataOutputStream output;
+
+    DirectStream(FSDataOutputStream output) {
+      this.output = output;
+    }
+
+    @Override
+    public void output(ByteBuffer buffer) throws IOException {
+      output.write(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
+    }
+  }
+
+  @Override
+  public long getPhysicalStripeSize() {
+    return adjustedStripeSize;
+  }
+
+  @Override
+  public boolean isCompressed() {
+    return codec != null;
+  }
+
+
+  public static CompressionCodec createCodec(CompressionKind kind) {
+    switch (kind) {
+      case NONE:
+        return null;
+      case ZLIB:
+        return new ZlibCodec();
+      case SNAPPY:
+        return new SnappyCodec();
+      case LZO:
+        try {
+          ClassLoader loader = Thread.currentThread().getContextClassLoader();
+          if (loader == null) {
+            loader = WriterImpl.class.getClassLoader();
+          }
+          @SuppressWarnings("unchecked")
+          Class<? extends CompressionCodec> lzo =
+              (Class<? extends CompressionCodec>)
+              loader.loadClass("org.apache.hadoop.hive.ql.io.orc.LzoCodec");
+          return lzo.newInstance();
+        } catch (ClassNotFoundException e) {
+          throw new IllegalArgumentException("LZO is not available.", e);
+        } catch (InstantiationException e) {
+          throw new IllegalArgumentException("Problem initializing LZO", e);
+        } catch (IllegalAccessException e) {
+          throw new IllegalArgumentException("Insufficient access to LZO", e);
+        }
+      default:
+        throw new IllegalArgumentException("Unknown compression codec: " +
+            kind);
+    }
+  }
+
+  private void writeStripeFooter(StripeFooter footer, long dataSize, long indexSize,
+      StripeInformation.Builder dirEntry) throws IOException {
+    footer.writeTo(protobufWriter);
+    protobufWriter.flush();
+    writer.flush();
+    dirEntry.setOffset(stripeStart);
+    dirEntry.setFooterLength(rawWriter.getPos() - stripeStart - dataSize - indexSize);
+  }
+
+  @VisibleForTesting
+  public static int getEstimatedBufferSize(long stripeSize, int numColumns,
+                                           int bs) {
+    // The worst case is that there are 2 big streams per a column and
+    // we want to guarantee that each stream gets ~10 buffers.
+    // This keeps buffers small enough that we don't get really small stripe
+    // sizes.
+    int estBufferSize = (int) (stripeSize / (20 * numColumns));
+    estBufferSize = getClosestBufferSize(estBufferSize);
+    return estBufferSize > bs ? bs : estBufferSize;
+  }
+
+  private static int getClosestBufferSize(int estBufferSize) {
+    final int kb4 = 4 * 1024;
+    final int kb8 = 8 * 1024;
+    final int kb16 = 16 * 1024;
+    final int kb32 = 32 * 1024;
+    final int kb64 = 64 * 1024;
+    final int kb128 = 128 * 1024;
+    final int kb256 = 256 * 1024;
+    if (estBufferSize <= kb4) {
+      return kb4;
+    } else if (estBufferSize > kb4 && estBufferSize <= kb8) {
+      return kb8;
+    } else if (estBufferSize > kb8 && estBufferSize <= kb16) {
+      return kb16;
+    } else if (estBufferSize > kb16 && estBufferSize <= kb32) {
+      return kb32;
+    } else if (estBufferSize > kb32 && estBufferSize <= kb64) {
+      return kb64;
+    } else if (estBufferSize > kb64 && estBufferSize <= kb128) {
+      return kb128;
+    } else {
+      return kb256;
+    }
+  }
+
+  @Override
+  public void writeFileMetadata(Metadata.Builder builder) throws IOException {
+    long startPosn = rawWriter.getPos();
+    Metadata metadata = builder.build();
+    metadata.writeTo(protobufWriter);
+    protobufWriter.flush();
+    writer.flush();
+    this.metadataLength = (int) (rawWriter.getPos() - startPosn);
+  }
+
+  @Override
+  public void writeFileFooter(Footer.Builder builder) throws IOException {
+    long bodyLength = rawWriter.getPos() - metadataLength;
+    builder.setContentLength(bodyLength);
+    builder.setHeaderLength(headerLength);
+    long startPosn = rawWriter.getPos();
+    Footer footer = builder.build();
+    footer.writeTo(protobufWriter);
+    protobufWriter.flush();
+    writer.flush();
+    this.footerLength = (int) (rawWriter.getPos() - startPosn);
+  }
+
+  @Override
+  public void writePostScript(PostScript.Builder builder) throws IOException {
+    builder.setCompression(writeCompressionKind(compress));
+    builder.setFooterLength(footerLength);
+    builder.setMetadataLength(metadataLength);
+    if (compress != CompressionKind.NONE) {
+      builder.setCompressionBlockSize(bufferSize);
+    }
+    PostScript ps = builder.build();
+    // need to write this uncompressed
+    long startPosn = rawWriter.getPos();
+    ps.writeTo(rawWriter);
+    long length = rawWriter.getPos() - startPosn;
+    if (length > 255) {
+      throw new IllegalArgumentException("PostScript too large at " + length);
+    }
+    rawWriter.writeByte((int)length);
+  }
+
+  @Override
+  public void close() throws IOException {
+    rawWriter.close();
+  }
+
+  private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
+    switch (kind) {
+      case NONE: return OrcProto.CompressionKind.NONE;
+      case ZLIB: return OrcProto.CompressionKind.ZLIB;
+      case SNAPPY: return OrcProto.CompressionKind.SNAPPY;
+      case LZO: return OrcProto.CompressionKind.LZO;
+      default:
+        throw new IllegalArgumentException("Unknown compression " + kind);
+    }
+  }
+
+  @Override
+  public void flush() throws IOException {
+    rawWriter.hflush();
+    // TODO: reset?
+  }
+
+  @Override
+  public long getRawWriterPosition() throws IOException {
+    return rawWriter.getPos();
+  }
+
+  @Override
+  public void appendRawStripe(byte[] stripe, int offset, int length,
+      StripeInformation.Builder dirEntry) throws IOException {
+    long start = rawWriter.getPos();
+    long availBlockSpace = blockSize - (start % blockSize);
+
+    // see if stripe can fit in the current hdfs block, else pad the remaining
+    // space in the block
+    if (length < blockSize && length > availBlockSpace &&
+        addBlockPadding) {
+      byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, availBlockSpace)];
+      LOG.info(String.format("Padding ORC by %d bytes while merging..",
+          availBlockSpace));
+      start += availBlockSpace;
+      while (availBlockSpace > 0) {
+        int writeLen = (int) Math.min(availBlockSpace, pad.length);
+        rawWriter.write(pad, 0, writeLen);
+        availBlockSpace -= writeLen;
+      }
+    }
+
+    rawWriter.write(stripe);
+    dirEntry.setOffset(start);
+  }
+
+
+  /**
+   * This class is used to hold the contents of streams as they are buffered.
+   * The TreeWriters write to the outStream and the codec compresses the
+   * data as buffers fill up and stores them in the output list. When the
+   * stripe is being written, the whole stream is written to the file.
+   */
+  private class BufferedStream implements OutStream.OutputReceiver {
+    private final OutStream outStream;
+    private final List<ByteBuffer> output = new ArrayList<ByteBuffer>();
+
+    BufferedStream(String name, int bufferSize,
+                   CompressionCodec codec) throws IOException {
+      outStream = new OutStream(name, bufferSize, codec, this);
+    }
+
+    /**
+     * Receive a buffer from the compression codec.
+     * @param buffer the buffer to save
+     */
+    @Override
+    public void output(ByteBuffer buffer) {
+      output.add(buffer);
+    }
+
+    /**
+     * @return the number of bytes in buffers that are allocated to this stream.
+     */
+    public long getBufferSize() {
+      long result = 0;
+      for (ByteBuffer buf: output) {
+        result += buf.capacity();
+      }
+      return outStream.getBufferSize() + result;
+    }
+
+    /**
+     * Write any saved buffers to the OutputStream if needed, and clears all the buffers.
+     */
+    public void spillToDiskAndClear() throws IOException {
+      if (!outStream.isSuppressed()) {
+        for (ByteBuffer buffer: output) {
+          rawWriter.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
+            buffer.remaining());
+        }
+      }
+      outStream.clear();
+      output.clear();
+    }
+
+    /**
+     * @return The number of bytes that will be written to the output. Assumes the stream writing
+     *         into this receiver has already been flushed.
+     */
+    public long getOutputSize() {
+      long result = 0;
+      for (ByteBuffer buffer: output) {
+        result += buffer.remaining();
+      }
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      return outStream.toString();
+    }
+  }
+
+  @Override
+  public OutStream getOrCreatePhysicalStream(StreamName name) throws IOException {
+    BufferedStream result = streams.get(name);
+    if (result == null) {
+      EnumSet<Modifier> modifiers = createCompressionModifiers(name.getKind());
+      result = new BufferedStream(name.toString(), bufferSize,
+          codec == null ? null : codec.modify(modifiers));
+      streams.put(name, result);
+    }
+    return result.outStream;
+  }
+
+  private EnumSet<Modifier> createCompressionModifiers(Kind kind) {
+    switch (kind) {
+      case BLOOM_FILTER:
+      case DATA:
+      case DICTIONARY_DATA:
+        return EnumSet.of(Modifier.TEXT,
+            compressionStrategy == CompressionStrategy.SPEED ? Modifier.FAST : Modifier.DEFAULT);
+      case LENGTH:
+      case DICTIONARY_COUNT:
+      case PRESENT:
+      case ROW_INDEX:
+      case SECONDARY:
+        // easily compressed using the fastest modes
+        return EnumSet.of(CompressionCodec.Modifier.FASTEST, CompressionCodec.Modifier.BINARY);
+      default:
+        LOG.warn("Missing ORC compression modifiers for " + kind);
+        return null;
+    }
+  }
+
+  @Override
+  public void finalizeStripe(StripeFooter.Builder footerBuilder,
+      StripeInformation.Builder dirEntry) throws IOException {
+    long indexSize = 0;
+    long dataSize = 0;
+    for (Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) {
+      BufferedStream receiver = pair.getValue();
+      OutStream outStream = receiver.outStream;
+      if (!outStream.isSuppressed()) {
+        outStream.flush();
+        long streamSize = receiver.getOutputSize();
+        StreamName name = pair.getKey();
+        footerBuilder.addStreams(OrcProto.Stream.newBuilder().setColumn(name.getColumn())
+            .setKind(name.getKind()).setLength(streamSize));
+        if (StreamName.Area.INDEX == name.getArea()) {
+          indexSize += streamSize;
+        } else {
+          dataSize += streamSize;
+        }
+      }
+    }
+    dirEntry.setIndexLength(indexSize).setDataLength(dataSize);
+
+    OrcProto.StripeFooter footer = footerBuilder.build();
+    // Do we need to pad the file so the stripe doesn't straddle a block boundary?
+    padStripe(indexSize, dataSize, footer.getSerializedSize());
+
+    // write out the data streams
+    for (Map.Entry<StreamName, BufferedStream> pair : streams.entrySet()) {
+      pair.getValue().spillToDiskAndClear();
+    }
+    // Write out the footer.
+    writeStripeFooter(footer, dataSize, indexSize, dirEntry);
+  }
+
+  @Override
+  public long estimateMemory() {
+    long result = 0;
+    for (BufferedStream stream: streams.values()) {
+      result += stream.getBufferSize();
+    }
+    return result;
+  }
+
+  @Override
+  public void writeIndexStream(StreamName name, Builder rowIndex) throws IOException {
+    OutStream stream = getOrCreatePhysicalStream(name);
+    rowIndex.build().writeTo(stream);
+    stream.flush();
+  }
+
+  @Override
+  public void writeBloomFilterStream(
+      StreamName name, BloomFilterIndex.Builder bloomFilterIndex) throws IOException {
+    OutStream stream = getOrCreatePhysicalStream(name);
+    bloomFilterIndex.build().writeTo(stream);
+    stream.flush();
+  }
+
+  @VisibleForTesting
+  public OutputStream getStream() throws IOException {
+    initialize();
+    return rawWriter;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/PhysicalWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/PhysicalWriter.java b/orc/src/java/org/apache/hive/orc/impl/PhysicalWriter.java
new file mode 100644
index 0000000..dc0089a
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/PhysicalWriter.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+import org.apache.hive.orc.OrcProto.BloomFilterIndex;
+import org.apache.hive.orc.OrcProto.Footer;
+import org.apache.hive.orc.OrcProto.Metadata;
+import org.apache.hive.orc.OrcProto.PostScript;
+import org.apache.hive.orc.OrcProto.RowIndex;
+import org.apache.hive.orc.OrcProto.StripeFooter;
+import org.apache.hive.orc.OrcProto.StripeInformation;
+
+public interface PhysicalWriter {
+
+  /**
+   * Creates all the streams/connections/etc. necessary to write.
+   */
+  void initialize() throws IOException;
+
+  /**
+   * Writes out the file metadata.
+   * @param builder Metadata builder to finalize and write.
+   */
+  void writeFileMetadata(Metadata.Builder builder) throws IOException;
+
+  /**
+   * Writes out the file footer.
+   * @param builder Footer builder to finalize and write.
+   */
+  void writeFileFooter(Footer.Builder builder) throws IOException;
+
+  /**
+   * Writes out the postscript (including the size byte if needed).
+   * @param builder Postscript builder to finalize and write.
+   */
+  void writePostScript(PostScript.Builder builder) throws IOException;
+
+  /**
+   * Creates physical stream to write data to.
+   * @param name Stream name.
+   * @return The output stream.
+   */
+  OutStream getOrCreatePhysicalStream(StreamName name) throws IOException;
+
+  /**
+   * Flushes the data in all the streams, spills them to disk, write out stripe footer.
+   * @param footer Stripe footer to be updated with relevant data and written out.
+   * @param dirEntry File metadata entry for the stripe, to be updated with relevant data.
+   */
+  void finalizeStripe(StripeFooter.Builder footer,
+      StripeInformation.Builder dirEntry) throws IOException;
+
+  /**
+   * Writes out the index for the stripe column.
+   * @param streamName Stream name.
+   * @param rowIndex Row index entries to write.
+   */
+  void writeIndexStream(StreamName name, RowIndex.Builder rowIndex) throws IOException;
+
+  /**
+   * Writes out the index for the stripe column.
+   * @param streamName Stream name.
+   * @param bloomFilterIndex Bloom filter index to write.
+   */
+  void writeBloomFilterStream(StreamName streamName,
+      BloomFilterIndex.Builder bloomFilterIndex) throws IOException;
+
+  /**
+   * Closes the writer.
+   */
+  void close() throws IOException;
+
+  /**
+   * Force-flushes the writer.
+   */
+  void flush() throws IOException;
+
+  /**
+   * @return the physical writer position (e.g. for updater).
+   */
+  long getRawWriterPosition() throws IOException;
+
+  /** @return physical stripe size, taking padding into account. */
+  long getPhysicalStripeSize();
+
+  /** @return whether the writer is compressed. */
+  boolean isCompressed();
+
+  /**
+   * Appends raw stripe data (e.g. for file merger).
+   * @param stripe Stripe data buffer.
+   * @param offset Stripe data buffer offset.
+   * @param length Stripe data buffer length.
+   * @param dirEntry File metadata entry for the stripe, to be updated with relevant data.
+   * @throws IOException
+   */
+  void appendRawStripe(byte[] stripe, int offset, int length,
+      StripeInformation.Builder dirEntry) throws IOException;
+
+  /**
+   * @return the estimated memory usage for the stripe.
+   */
+  long estimateMemory();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/PositionProvider.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/PositionProvider.java b/orc/src/java/org/apache/hive/orc/impl/PositionProvider.java
new file mode 100644
index 0000000..36c2654
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/PositionProvider.java
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+/**
+ * An interface used for seeking to a row index.
+ */
+public interface PositionProvider {
+  long getNext();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/PositionRecorder.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/PositionRecorder.java b/orc/src/java/org/apache/hive/orc/impl/PositionRecorder.java
new file mode 100644
index 0000000..11eb9cc
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/PositionRecorder.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+/**
+ * An interface for recording positions in a stream.
+ */
+public interface PositionRecorder {
+  void addPosition(long offset);
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/PositionedOutputStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/PositionedOutputStream.java b/orc/src/java/org/apache/hive/orc/impl/PositionedOutputStream.java
new file mode 100644
index 0000000..1b8f7c2
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/PositionedOutputStream.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+public abstract class PositionedOutputStream extends OutputStream {
+
+  /**
+   * Record the current position to the recorder.
+   * @param recorder the object that receives the position
+   * @throws IOException
+   */
+  public abstract void getPosition(PositionRecorder recorder
+                                   ) throws IOException;
+
+  /**
+   * Get the memory size currently allocated as buffer associated with this
+   * stream.
+   * @return the number of bytes used by buffers.
+   */
+  public abstract long getBufferSize();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/ReaderImpl.java b/orc/src/java/org/apache/hive/orc/impl/ReaderImpl.java
new file mode 100644
index 0000000..8ab9e92
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/ReaderImpl.java
@@ -0,0 +1,763 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hive.orc.ColumnStatistics;
+import org.apache.hive.orc.CompressionCodec;
+import org.apache.hive.orc.FileFormatException;
+import org.apache.hive.orc.FileMetadata;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcUtils;
+import org.apache.hive.orc.Reader;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.StripeInformation;
+import org.apache.hive.orc.StripeStatistics;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.orc.OrcProto;
+
+import com.google.common.collect.Lists;
+import com.google.protobuf.CodedInputStream;
+
+public class ReaderImpl implements Reader {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ReaderImpl.class);
+
+  private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
+
+  protected final FileSystem fileSystem;
+  private final long maxLength;
+  protected final Path path;
+  protected final CompressionKind compressionKind;
+  protected CompressionCodec codec;
+  protected int bufferSize;
+  protected OrcProto.Metadata metadata;
+  private List<OrcProto.StripeStatistics> stripeStats;
+  private final int metadataSize;
+  protected final List<OrcProto.Type> types;
+  private TypeDescription schema;
+  private final List<OrcProto.UserMetadataItem> userMetadata;
+  private final List<OrcProto.ColumnStatistics> fileStats;
+  private final List<StripeInformation> stripes;
+  protected final int rowIndexStride;
+  private final long contentLength, numberOfRows;
+
+  private long deserializedSize = -1;
+  protected final Configuration conf;
+  private final List<Integer> versionList;
+  private final OrcFile.WriterVersion writerVersion;
+
+  protected OrcTail tail;
+
+  public static class StripeInformationImpl
+      implements StripeInformation {
+    private final OrcProto.StripeInformation stripe;
+
+    public StripeInformationImpl(OrcProto.StripeInformation stripe) {
+      this.stripe = stripe;
+    }
+
+    @Override
+    public long getOffset() {
+      return stripe.getOffset();
+    }
+
+    @Override
+    public long getLength() {
+      return stripe.getDataLength() + getIndexLength() + getFooterLength();
+    }
+
+    @Override
+    public long getDataLength() {
+      return stripe.getDataLength();
+    }
+
+    @Override
+    public long getFooterLength() {
+      return stripe.getFooterLength();
+    }
+
+    @Override
+    public long getIndexLength() {
+      return stripe.getIndexLength();
+    }
+
+    @Override
+    public long getNumberOfRows() {
+      return stripe.getNumberOfRows();
+    }
+
+    @Override
+    public String toString() {
+      return "offset: " + getOffset() + " data: " + getDataLength() +
+        " rows: " + getNumberOfRows() + " tail: " + getFooterLength() +
+        " index: " + getIndexLength();
+    }
+  }
+
+  @Override
+  public long getNumberOfRows() {
+    return numberOfRows;
+  }
+
+  @Override
+  public List<String> getMetadataKeys() {
+    List<String> result = new ArrayList<String>();
+    for(OrcProto.UserMetadataItem item: userMetadata) {
+      result.add(item.getName());
+    }
+    return result;
+  }
+
+  @Override
+  public ByteBuffer getMetadataValue(String key) {
+    for(OrcProto.UserMetadataItem item: userMetadata) {
+      if (item.hasName() && item.getName().equals(key)) {
+        return item.getValue().asReadOnlyByteBuffer();
+      }
+    }
+    throw new IllegalArgumentException("Can't find user metadata " + key);
+  }
+
+  public boolean hasMetadataValue(String key) {
+    for(OrcProto.UserMetadataItem item: userMetadata) {
+      if (item.hasName() && item.getName().equals(key)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public CompressionKind getCompressionKind() {
+    return compressionKind;
+  }
+
+  @Override
+  public int getCompressionSize() {
+    return bufferSize;
+  }
+
+  @Override
+  public List<StripeInformation> getStripes() {
+    return stripes;
+  }
+
+  @Override
+  public long getContentLength() {
+    return contentLength;
+  }
+
+  @Override
+  public List<OrcProto.Type> getTypes() {
+    return types;
+  }
+
+  @Override
+  public OrcFile.Version getFileVersion() {
+    for (OrcFile.Version version: OrcFile.Version.values()) {
+      if ((versionList != null && !versionList.isEmpty()) &&
+          version.getMajor() == versionList.get(0) &&
+          version.getMinor() == versionList.get(1)) {
+        return version;
+      }
+    }
+    return OrcFile.Version.V_0_11;
+  }
+
+  @Override
+  public OrcFile.WriterVersion getWriterVersion() {
+    return writerVersion;
+  }
+
+  @Override
+  public OrcProto.FileTail getFileTail() {
+    return tail.getFileTail();
+  }
+
+  @Override
+  public int getRowIndexStride() {
+    return rowIndexStride;
+  }
+
+  @Override
+  public ColumnStatistics[] getStatistics() {
+    ColumnStatistics[] result = new ColumnStatistics[types.size()];
+    for(int i=0; i < result.length; ++i) {
+      result[i] = ColumnStatisticsImpl.deserialize(fileStats.get(i));
+    }
+    return result;
+  }
+
+  @Override
+  public TypeDescription getSchema() {
+    return schema;
+  }
+
+  /**
+   * Ensure this is an ORC file to prevent users from trying to read text
+   * files or RC files as ORC files.
+   * @param in the file being read
+   * @param path the filename for error messages
+   * @param psLen the postscript length
+   * @param buffer the tail of the file
+   * @throws IOException
+   */
+  protected static void ensureOrcFooter(FSDataInputStream in,
+                                        Path path,
+                                        int psLen,
+                                        ByteBuffer buffer) throws IOException {
+    int magicLength = OrcFile.MAGIC.length();
+    int fullLength = magicLength + 1;
+    if (psLen < fullLength || buffer.remaining() < fullLength) {
+      throw new FileFormatException("Malformed ORC file " + path +
+          ". Invalid postscript length " + psLen);
+    }
+    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
+    byte[] array = buffer.array();
+    // now look for the magic string at the end of the postscript.
+    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
+      // If it isn't there, this may be the 0.11.0 version of ORC.
+      // Read the first 3 bytes of the file to check for the header
+      byte[] header = new byte[magicLength];
+      in.readFully(0, header, 0, magicLength);
+      // if it isn't there, this isn't an ORC file
+      if (!Text.decode(header, 0 , magicLength).equals(OrcFile.MAGIC)) {
+        throw new FileFormatException("Malformed ORC file " + path +
+            ". Invalid postscript.");
+      }
+    }
+  }
+
+  /**
+   * Ensure this is an ORC file to prevent users from trying to read text
+   * files or RC files as ORC files.
+   * @param psLen the postscript length
+   * @param buffer the tail of the file
+   * @throws IOException
+   */
+  protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
+    int magicLength = OrcFile.MAGIC.length();
+    int fullLength = magicLength + 1;
+    if (psLen < fullLength || buffer.remaining() < fullLength) {
+      throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+    }
+
+    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
+    byte[] array = buffer.array();
+    // now look for the magic string at the end of the postscript.
+    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
+      // if it isn't there, this may be 0.11.0 version of the ORC file.
+      // Read the first 3 bytes from the buffer to check for the header
+      if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
+        throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
+      }
+    }
+  }
+
+  /**
+   * Build a version string out of an array.
+   * @param version the version number as a list
+   * @return the human readable form of the version string
+   */
+  private static String versionString(List<Integer> version) {
+    StringBuilder buffer = new StringBuilder();
+    for(int i=0; i < version.size(); ++i) {
+      if (i != 0) {
+        buffer.append('.');
+      }
+      buffer.append(version.get(i));
+    }
+    return buffer.toString();
+  }
+
+  /**
+   * Check to see if this ORC file is from a future version and if so,
+   * warn the user that we may not be able to read all of the column encodings.
+   * @param log the logger to write any error message to
+   * @param path the data source path for error messages
+   * @param version the version of hive that wrote the file.
+   */
+  protected static void checkOrcVersion(Logger log, Path path,
+                                        List<Integer> version) {
+    if (version.size() >= 1) {
+      int major = version.get(0);
+      int minor = 0;
+      if (version.size() >= 2) {
+        minor = version.get(1);
+      }
+      if (major > OrcFile.Version.CURRENT.getMajor() ||
+          (major == OrcFile.Version.CURRENT.getMajor() &&
+           minor > OrcFile.Version.CURRENT.getMinor())) {
+        log.warn(path + " was written by a future Hive version " +
+                 versionString(version) +
+                 ". This file may not be readable by this version of Hive.");
+      }
+    }
+  }
+
+  /**
+  * Constructor that let's the user specify additional options.
+   * @param path pathname for file
+   * @param options options for reading
+   * @throws IOException
+   */
+  public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
+    FileSystem fs = options.getFilesystem();
+    if (fs == null) {
+      fs = path.getFileSystem(options.getConfiguration());
+    }
+    this.fileSystem = fs;
+    this.path = path;
+    this.conf = options.getConfiguration();
+    this.maxLength = options.getMaxLength();
+    FileMetadata fileMetadata = options.getFileMetadata();
+    if (fileMetadata != null) {
+      this.compressionKind = fileMetadata.getCompressionKind();
+      this.bufferSize = fileMetadata.getCompressionBufferSize();
+      this.codec = PhysicalFsWriter.createCodec(compressionKind);
+      this.metadataSize = fileMetadata.getMetadataSize();
+      this.stripeStats = fileMetadata.getStripeStats();
+      this.versionList = fileMetadata.getVersionList();
+      this.writerVersion =
+          OrcFile.WriterVersion.from(fileMetadata.getWriterVersionNum());
+      this.types = fileMetadata.getTypes();
+      this.rowIndexStride = fileMetadata.getRowIndexStride();
+      this.contentLength = fileMetadata.getContentLength();
+      this.numberOfRows = fileMetadata.getNumberOfRows();
+      this.fileStats = fileMetadata.getFileStats();
+      this.stripes = fileMetadata.getStripes();
+      this.userMetadata = null; // not cached and not needed here
+    } else {
+      OrcTail orcTail = options.getOrcTail();
+      if (orcTail == null) {
+        tail = extractFileTail(fs, path, options.getMaxLength());
+        options.orcTail(tail);
+      } else {
+        tail = orcTail;
+      }
+      this.compressionKind = tail.getCompressionKind();
+      this.codec = tail.getCompressionCodec();
+      this.bufferSize = tail.getCompressionBufferSize();
+      this.metadataSize = tail.getMetadataSize();
+      this.versionList = tail.getPostScript().getVersionList();
+      this.types = tail.getFooter().getTypesList();
+      this.rowIndexStride = tail.getFooter().getRowIndexStride();
+      this.contentLength = tail.getFooter().getContentLength();
+      this.numberOfRows = tail.getFooter().getNumberOfRows();
+      this.userMetadata = tail.getFooter().getMetadataList();
+      this.fileStats = tail.getFooter().getStatisticsList();
+      this.writerVersion = tail.getWriterVersion();
+      this.stripes = tail.getStripes();
+      this.stripeStats = tail.getStripeStatisticsProto();
+    }
+    this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
+  }
+
+  /**
+   * Get the WriterVersion based on the ORC file postscript.
+   * @param writerVersion the integer writer version
+   * @return the version of the software that produced the file
+   */
+  public static OrcFile.WriterVersion getWriterVersion(int writerVersion) {
+    for(OrcFile.WriterVersion version: OrcFile.WriterVersion.values()) {
+      if (version.getId() == writerVersion) {
+        return version;
+      }
+    }
+    return OrcFile.WriterVersion.FUTURE;
+  }
+
+  private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
+      int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(footerAbsPos);
+    bb.limit(footerAbsPos + footerSize);
+    return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
+        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
+  }
+
+  public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+      int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(metadataAbsPos);
+    bb.limit(metadataAbsPos + metadataSize);
+    return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
+        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
+  }
+
+  private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
+      int psLen, int psAbsOffset) throws IOException {
+    // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
+    assert bb.hasArray();
+    CodedInputStream in = CodedInputStream.newInstance(
+        bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
+    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+    checkOrcVersion(LOG, path, ps.getVersionList());
+
+    // Check compression codec.
+    switch (ps.getCompression()) {
+      case NONE:
+        break;
+      case ZLIB:
+        break;
+      case SNAPPY:
+        break;
+      case LZO:
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown compression");
+    }
+    return ps;
+  }
+
+  public static OrcTail extractFileTail(ByteBuffer buffer)
+      throws IOException {
+    return extractFileTail(buffer, -1, -1);
+  }
+
+  public static OrcTail extractFileTail(ByteBuffer buffer, long fileLength, long modificationTime)
+      throws IOException {
+    int readSize = buffer.limit();
+    int psLen = buffer.get(readSize - 1) & 0xff;
+    int psOffset = readSize - 1 - psLen;
+    ensureOrcFooter(buffer, psLen);
+    byte[] psBuffer = new byte[psLen];
+    System.arraycopy(buffer.array(), psOffset, psBuffer, 0, psLen);
+    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(psBuffer);
+    int footerSize = (int) ps.getFooterLength();
+    CompressionCodec codec = PhysicalFsWriter
+        .createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+    OrcProto.Footer footer = extractFooter(buffer,
+        (int) (buffer.position() + ps.getMetadataLength()),
+        footerSize, codec, (int) ps.getCompressionBlockSize());
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder()
+        .setPostscriptLength(psLen)
+        .setPostscript(ps)
+        .setFooter(footer)
+        .setFileLength(fileLength);
+    // clear does not clear the contents but sets position to 0 and limit = capacity
+    buffer.clear();
+    return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
+  }
+
+  protected OrcTail extractFileTail(FileSystem fs, Path path,
+      long maxFileLength) throws IOException {
+    FSDataInputStream file = fs.open(path);
+    ByteBuffer buffer;
+    OrcProto.PostScript ps;
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
+    long modificationTime;
+    try {
+      // figure out the size of the file using the option or filesystem
+      long size;
+      if (maxFileLength == Long.MAX_VALUE) {
+        FileStatus fileStatus = fs.getFileStatus(path);
+        size = fileStatus.getLen();
+        modificationTime = fileStatus.getModificationTime();
+      } else {
+        size = maxFileLength;
+        modificationTime = -1;
+      }
+      fileTailBuilder.setFileLength(size);
+
+      //read last bytes into buffer to get PostScript
+      int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
+      buffer = ByteBuffer.allocate(readSize);
+      assert buffer.position() == 0;
+      file.readFully((size - readSize),
+          buffer.array(), buffer.arrayOffset(), readSize);
+      buffer.position(0);
+
+      //read the PostScript
+      //get length of PostScript
+      int psLen = buffer.get(readSize - 1) & 0xff;
+      ensureOrcFooter(file, path, psLen, buffer);
+      int psOffset = readSize - 1 - psLen;
+      ps = extractPostScript(buffer, path, psLen, psOffset);
+      bufferSize = (int) ps.getCompressionBlockSize();
+      codec = PhysicalFsWriter.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
+      fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);
+
+      int footerSize = (int) ps.getFooterLength();
+      int metadataSize = (int) ps.getMetadataLength();
+
+      //check if extra bytes need to be read
+      int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
+      int tailSize = 1 + psLen + footerSize + metadataSize;
+      if (extra > 0) {
+        //more bytes need to be read, seek back to the right place and read extra bytes
+        ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
+        file.readFully((size - readSize - extra), extraBuf.array(),
+            extraBuf.arrayOffset() + extraBuf.position(), extra);
+        extraBuf.position(extra);
+        //append with already read bytes
+        extraBuf.put(buffer);
+        buffer = extraBuf;
+        buffer.position(0);
+        buffer.limit(tailSize);
+        readSize += extra;
+        psOffset = readSize - 1 - psLen;
+      } else {
+        //footer is already in the bytes in buffer, just adjust position, length
+        buffer.position(psOffset - footerSize - metadataSize);
+        buffer.limit(buffer.position() + tailSize);
+      }
+
+      buffer.mark();
+      int footerOffset = psOffset - footerSize;
+      buffer.position(footerOffset);
+      ByteBuffer footerBuffer = buffer.slice();
+      buffer.reset();
+      OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize,
+          codec, bufferSize);
+      fileTailBuilder.setFooter(footer);
+    } finally {
+      try {
+        file.close();
+      } catch (IOException ex) {
+        LOG.error("Failed to close the file after another error", ex);
+      }
+    }
+
+    ByteBuffer serializedTail = ByteBuffer.allocate(buffer.remaining());
+    serializedTail.put(buffer.slice());
+    serializedTail.rewind();
+    return new OrcTail(fileTailBuilder.build(), serializedTail, modificationTime);
+  }
+
+  @Override
+  public ByteBuffer getSerializedFileFooter() {
+    return tail.getSerializedTail();
+  }
+
+  @Override
+  public RecordReader rows() throws IOException {
+    return rows(new Options());
+  }
+
+  @Override
+  public RecordReader rows(Options options) throws IOException {
+    LOG.info("Reading ORC rows from " + path + " with " + options);
+    return new RecordReaderImpl(this, options);
+  }
+
+
+  @Override
+  public long getRawDataSize() {
+    // if the deserializedSize is not computed, then compute it, else
+    // return the already computed size. since we are reading from the footer
+    // we don't have to compute deserialized size repeatedly
+    if (deserializedSize == -1) {
+      List<Integer> indices = Lists.newArrayList();
+      for (int i = 0; i < fileStats.size(); ++i) {
+        indices.add(i);
+      }
+      deserializedSize = getRawDataSizeFromColIndices(indices);
+    }
+    return deserializedSize;
+  }
+
+  @Override
+  public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
+    return getRawDataSizeFromColIndices(colIndices, types, fileStats);
+  }
+
+  public static long getRawDataSizeFromColIndices(
+      List<Integer> colIndices, List<OrcProto.Type> types,
+      List<OrcProto.ColumnStatistics> stats) {
+    long result = 0;
+    for (int colIdx : colIndices) {
+      result += getRawDataSizeOfColumn(colIdx, types, stats);
+    }
+    return result;
+  }
+
+  private static long getRawDataSizeOfColumn(int colIdx, List<OrcProto.Type> types,
+      List<OrcProto.ColumnStatistics> stats) {
+    OrcProto.ColumnStatistics colStat = stats.get(colIdx);
+    long numVals = colStat.getNumberOfValues();
+    OrcProto.Type type = types.get(colIdx);
+
+    switch (type.getKind()) {
+    case BINARY:
+      // old orc format doesn't support binary statistics. checking for binary
+      // statistics is not required as protocol buffers takes care of it.
+      return colStat.getBinaryStatistics().getSum();
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+      // old orc format doesn't support sum for string statistics. checking for
+      // existence is not required as protocol buffers takes care of it.
+
+      // ORC strings are deserialized to java strings. so use java data model's
+      // string size
+      numVals = numVals == 0 ? 1 : numVals;
+      int avgStrLen = (int) (colStat.getStringStatistics().getSum() / numVals);
+      return numVals * JavaDataModel.get().lengthForStringOfLength(avgStrLen);
+    case TIMESTAMP:
+      return numVals * JavaDataModel.get().lengthOfTimestamp();
+    case DATE:
+      return numVals * JavaDataModel.get().lengthOfDate();
+    case DECIMAL:
+      return numVals * JavaDataModel.get().lengthOfDecimal();
+    case DOUBLE:
+    case LONG:
+      return numVals * JavaDataModel.get().primitive2();
+    case FLOAT:
+    case INT:
+    case SHORT:
+    case BOOLEAN:
+    case BYTE:
+      return numVals * JavaDataModel.get().primitive1();
+    default:
+      LOG.debug("Unknown primitive category: " + type.getKind());
+      break;
+    }
+
+    return 0;
+  }
+
+  @Override
+  public long getRawDataSizeOfColumns(List<String> colNames) {
+    List<Integer> colIndices = getColumnIndicesFromNames(colNames);
+    return getRawDataSizeFromColIndices(colIndices);
+  }
+
+  private List<Integer> getColumnIndicesFromNames(List<String> colNames) {
+    // top level struct
+    OrcProto.Type type = types.get(0);
+    List<Integer> colIndices = Lists.newArrayList();
+    List<String> fieldNames = type.getFieldNamesList();
+    int fieldIdx;
+    for (String colName : colNames) {
+      if (fieldNames.contains(colName)) {
+        fieldIdx = fieldNames.indexOf(colName);
+      } else {
+        String s = "Cannot find field for: " + colName + " in ";
+        for (String fn : fieldNames) {
+          s += fn + ", ";
+        }
+        LOG.warn(s);
+        continue;
+      }
+
+      // a single field may span multiple columns. find start and end column
+      // index for the requested field
+      int idxStart = type.getSubtypes(fieldIdx);
+
+      int idxEnd;
+
+      // if the specified is the last field and then end index will be last
+      // column index
+      if (fieldIdx + 1 > fieldNames.size() - 1) {
+        idxEnd = getLastIdx() + 1;
+      } else {
+        idxEnd = type.getSubtypes(fieldIdx + 1);
+      }
+
+      // if start index and end index are same then the field is a primitive
+      // field else complex field (like map, list, struct, union)
+      if (idxStart == idxEnd) {
+        // simple field
+        colIndices.add(idxStart);
+      } else {
+        // complex fields spans multiple columns
+        for (int i = idxStart; i < idxEnd; i++) {
+          colIndices.add(i);
+        }
+      }
+    }
+    return colIndices;
+  }
+
+  private int getLastIdx() {
+    Set<Integer> indices = new HashSet<>();
+    for (OrcProto.Type type : types) {
+      indices.addAll(type.getSubtypesList());
+    }
+    return Collections.max(indices);
+  }
+
+  @Override
+  public List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics() {
+    return stripeStats;
+  }
+
+  @Override
+  public List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics() {
+    return fileStats;
+  }
+
+  @Override
+  public List<StripeStatistics> getStripeStatistics() throws IOException {
+    if (stripeStats == null && metadata == null) {
+      metadata = extractMetadata(tail.getSerializedTail(), 0, metadataSize, codec, bufferSize);
+      stripeStats = metadata.getStripeStatsList();
+    }
+    List<StripeStatistics> result = new ArrayList<>();
+    for (OrcProto.StripeStatistics ss : stripeStats) {
+      result.add(new StripeStatistics(ss.getColStatsList()));
+    }
+    return result;
+  }
+
+  public List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata() {
+    return userMetadata;
+  }
+
+  @Override
+  public List<Integer> getVersionList() {
+    return versionList;
+  }
+
+  @Override
+  public int getMetadataSize() {
+    return metadataSize;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    buffer.append("ORC Reader(");
+    buffer.append(path);
+    if (maxLength != -1) {
+      buffer.append(", ");
+      buffer.append(maxLength);
+    }
+    buffer.append(")");
+    return buffer.toString();
+  }
+}


[13/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/ZeroCopyShims.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ZeroCopyShims.java b/orc/src/java/org/apache/orc/impl/ZeroCopyShims.java
deleted file mode 100644
index de02c8b..0000000
--- a/orc/src/java/org/apache/orc/impl/ZeroCopyShims.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.ReadOption;
-import org.apache.hadoop.io.ByteBufferPool;
-
-class ZeroCopyShims {
-  private static final class ByteBufferPoolAdapter implements ByteBufferPool {
-    private HadoopShims.ByteBufferPoolShim pool;
-
-    public ByteBufferPoolAdapter(HadoopShims.ByteBufferPoolShim pool) {
-      this.pool = pool;
-    }
-
-    @Override
-    public final ByteBuffer getBuffer(boolean direct, int length) {
-      return this.pool.getBuffer(direct, length);
-    }
-
-    @Override
-    public final void putBuffer(ByteBuffer buffer) {
-      this.pool.putBuffer(buffer);
-    }
-  }
-
-  private static final class ZeroCopyAdapter implements HadoopShims.ZeroCopyReaderShim {
-    private final FSDataInputStream in;
-    private final ByteBufferPoolAdapter pool;
-    private final static EnumSet<ReadOption> CHECK_SUM = EnumSet
-        .noneOf(ReadOption.class);
-    private final static EnumSet<ReadOption> NO_CHECK_SUM = EnumSet
-        .of(ReadOption.SKIP_CHECKSUMS);
-
-    public ZeroCopyAdapter(FSDataInputStream in,
-                           HadoopShims.ByteBufferPoolShim poolshim) {
-      this.in = in;
-      if (poolshim != null) {
-        pool = new ByteBufferPoolAdapter(poolshim);
-      } else {
-        pool = null;
-      }
-    }
-
-    public final ByteBuffer readBuffer(int maxLength, boolean verifyChecksums)
-        throws IOException {
-      EnumSet<ReadOption> options = NO_CHECK_SUM;
-      if (verifyChecksums) {
-        options = CHECK_SUM;
-      }
-      return this.in.read(this.pool, maxLength, options);
-    }
-
-    public final void releaseBuffer(ByteBuffer buffer) {
-      this.in.releaseBuffer(buffer);
-    }
-
-    @Override
-    public final void close() throws IOException {
-      this.in.close();
-    }
-  }
-
-  public static HadoopShims.ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
-                                                                 HadoopShims.ByteBufferPoolShim pool) throws IOException {
-    return new ZeroCopyAdapter(in, pool);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/ZlibCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ZlibCodec.java b/orc/src/java/org/apache/orc/impl/ZlibCodec.java
deleted file mode 100644
index 5f648a8..0000000
--- a/orc/src/java/org/apache/orc/impl/ZlibCodec.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-import java.util.zip.DataFormatException;
-import java.util.zip.Deflater;
-import java.util.zip.Inflater;
-
-import javax.annotation.Nullable;
-
-import org.apache.hadoop.io.compress.DirectDecompressor;
-import org.apache.orc.CompressionCodec;
-
-public class ZlibCodec implements CompressionCodec, DirectDecompressionCodec {
-  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
-  private Boolean direct = null;
-
-  private final int level;
-  private final int strategy;
-
-  public ZlibCodec() {
-    level = Deflater.DEFAULT_COMPRESSION;
-    strategy = Deflater.DEFAULT_STRATEGY;
-  }
-
-  private ZlibCodec(int level, int strategy) {
-    this.level = level;
-    this.strategy = strategy;
-  }
-
-  @Override
-  public boolean compress(ByteBuffer in, ByteBuffer out,
-                          ByteBuffer overflow) throws IOException {
-    Deflater deflater = new Deflater(level, true);
-    deflater.setStrategy(strategy);
-    int length = in.remaining();
-    deflater.setInput(in.array(), in.arrayOffset() + in.position(), length);
-    deflater.finish();
-    int outSize = 0;
-    int offset = out.arrayOffset() + out.position();
-    while (!deflater.finished() && (length > outSize)) {
-      int size = deflater.deflate(out.array(), offset, out.remaining());
-      out.position(size + out.position());
-      outSize += size;
-      offset += size;
-      // if we run out of space in the out buffer, use the overflow
-      if (out.remaining() == 0) {
-        if (overflow == null) {
-          deflater.end();
-          return false;
-        }
-        out = overflow;
-        offset = out.arrayOffset() + out.position();
-      }
-    }
-    deflater.end();
-    return length > outSize;
-  }
-
-  @Override
-  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
-
-    if(in.isDirect() && out.isDirect()) {
-      directDecompress(in, out);
-      return;
-    }
-
-    Inflater inflater = new Inflater(true);
-    inflater.setInput(in.array(), in.arrayOffset() + in.position(),
-                      in.remaining());
-    while (!(inflater.finished() || inflater.needsDictionary() ||
-             inflater.needsInput())) {
-      try {
-        int count = inflater.inflate(out.array(),
-                                     out.arrayOffset() + out.position(),
-                                     out.remaining());
-        out.position(count + out.position());
-      } catch (DataFormatException dfe) {
-        throw new IOException("Bad compression data", dfe);
-      }
-    }
-    out.flip();
-    inflater.end();
-    in.position(in.limit());
-  }
-
-  @Override
-  public boolean isAvailable() {
-    if (direct == null) {
-      // see nowrap option in new Inflater(boolean) which disables zlib headers
-      try {
-        if (SHIMS.getDirectDecompressor(
-            HadoopShims.DirectCompressionType.ZLIB_NOHEADER) != null) {
-          direct = Boolean.valueOf(true);
-        } else {
-          direct = Boolean.valueOf(false);
-        }
-      } catch (UnsatisfiedLinkError ule) {
-        direct = Boolean.valueOf(false);
-      }
-    }
-    return direct.booleanValue();
-  }
-
-  @Override
-  public void directDecompress(ByteBuffer in, ByteBuffer out)
-      throws IOException {
-    HadoopShims.DirectDecompressor decompressShim =
-        SHIMS.getDirectDecompressor(HadoopShims.DirectCompressionType.ZLIB_NOHEADER);
-    decompressShim.decompress(in, out);
-    out.flip(); // flip for read
-  }
-
-  @Override
-  public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) {
-
-    if (modifiers == null) {
-      return this;
-    }
-
-    int l = this.level;
-    int s = this.strategy;
-
-    for (Modifier m : modifiers) {
-      switch (m) {
-      case BINARY:
-        /* filtered == less LZ77, more huffman */
-        s = Deflater.FILTERED;
-        break;
-      case TEXT:
-        s = Deflater.DEFAULT_STRATEGY;
-        break;
-      case FASTEST:
-        // deflate_fast looking for 8 byte patterns
-        l = Deflater.BEST_SPEED;
-        break;
-      case FAST:
-        // deflate_fast looking for 16 byte patterns
-        l = Deflater.BEST_SPEED + 1;
-        break;
-      case DEFAULT:
-        // deflate_slow looking for 128 byte patterns
-        l = Deflater.DEFAULT_COMPRESSION;
-        break;
-      default:
-        break;
-      }
-    }
-    return new ZlibCodec(l, s);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/tools/FileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/tools/FileDump.java b/orc/src/java/org/apache/orc/tools/FileDump.java
deleted file mode 100644
index 1a1d8ab..0000000
--- a/orc/src/java/org/apache/orc/tools/FileDump.java
+++ /dev/null
@@ -1,946 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.tools;
-
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintStream;
-import java.text.DecimalFormat;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.OptionBuilder;
-import org.apache.commons.cli.Options;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.orc.BloomFilterIO;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.Writer;
-import org.apache.orc.impl.AcidStats;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.impl.OrcAcidUtils;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-import org.apache.orc.impl.RecordReaderImpl;
-import org.codehaus.jettison.json.JSONException;
-import org.codehaus.jettison.json.JSONWriter;
-
-import com.google.common.base.Joiner;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-
-/**
- * A tool for printing out the file structure of ORC files.
- */
-public final class FileDump {
-  public static final String UNKNOWN = "UNKNOWN";
-  public static final String SEPARATOR = Strings.repeat("_", 120) + "\n";
-  public static final int DEFAULT_BLOCK_SIZE = 256 * 1024 * 1024;
-  public static final String DEFAULT_BACKUP_PATH = System.getProperty("java.io.tmpdir");
-  public static final PathFilter HIDDEN_AND_SIDE_FILE_FILTER = new PathFilter() {
-    public boolean accept(Path p) {
-      String name = p.getName();
-      return !name.startsWith("_") && !name.startsWith(".") && !name.endsWith(
-          OrcAcidUtils.DELTA_SIDE_FILE_SUFFIX);
-    }
-  };
-
-  // not used
-  private FileDump() {
-  }
-
-  public static void main(String[] args) throws Exception {
-    Configuration conf = new Configuration();
-
-    List<Integer> rowIndexCols = new ArrayList<Integer>(0);
-    Options opts = createOptions();
-    CommandLine cli = new GnuParser().parse(opts, args);
-
-    if (cli.hasOption('h')) {
-      HelpFormatter formatter = new HelpFormatter();
-      formatter.printHelp("orcfiledump", opts);
-      return;
-    }
-
-    boolean dumpData = cli.hasOption('d');
-    boolean recover = cli.hasOption("recover");
-    boolean skipDump = cli.hasOption("skip-dump");
-    String backupPath = DEFAULT_BACKUP_PATH;
-    if (cli.hasOption("backup-path")) {
-      backupPath = cli.getOptionValue("backup-path");
-    }
-
-    if (cli.hasOption("r")) {
-      String val = cli.getOptionValue("r");
-      if (val != null && val.trim().equals("*")) {
-        rowIndexCols = null; // All the columns
-      } else {
-        String[] colStrs = cli.getOptionValue("r").split(",");
-        rowIndexCols = new ArrayList<Integer>(colStrs.length);
-        for (String colStr : colStrs) {
-          rowIndexCols.add(Integer.parseInt(colStr));
-        }
-      }
-    }
-
-    boolean printTimeZone = cli.hasOption('t');
-    boolean jsonFormat = cli.hasOption('j');
-    String[] files = cli.getArgs();
-    if (files.length == 0) {
-      System.err.println("Error : ORC files are not specified");
-      return;
-    }
-
-    // if the specified path is directory, iterate through all files and print the file dump
-    List<String> filesInPath = Lists.newArrayList();
-    for (String filename : files) {
-      Path path = new Path(filename);
-      filesInPath.addAll(getAllFilesInPath(path, conf));
-    }
-
-    if (dumpData) {
-      printData(filesInPath, conf);
-    } else if (recover && skipDump) {
-      recoverFiles(filesInPath, conf, backupPath);
-    } else {
-      if (jsonFormat) {
-        boolean prettyPrint = cli.hasOption('p');
-        JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint, printTimeZone);
-      } else {
-        printMetaData(filesInPath, conf, rowIndexCols, printTimeZone, recover, backupPath);
-      }
-    }
-  }
-
-  /**
-   * This method returns an ORC reader object if the specified file is readable. If the specified
-   * file has side file (_flush_length) file, then max footer offset will be read from the side
-   * file and orc reader will be created from that offset. Since both data file and side file
-   * use hflush() for flushing the data, there could be some inconsistencies and both files could be
-   * out-of-sync. Following are the cases under which null will be returned
-   *
-   * 1) If the file specified by path or its side file is still open for writes
-   * 2) If *_flush_length file does not return any footer offset
-   * 3) If *_flush_length returns a valid footer offset but the data file is not readable at that
-   *    position (incomplete data file)
-   * 4) If *_flush_length file length is not a multiple of 8, then reader will be created from
-   *    previous valid footer. If there is no such footer (file length > 0 and < 8), then null will
-   *    be returned
-   *
-   * Also, if this method detects any file corruption (mismatch between data file and side file)
-   * then it will add the corresponding file to the specified input list for corrupted files.
-   *
-   * In all other cases, where the file is readable this method will return a reader object.
-   *
-   * @param path - file to get reader for
-   * @param conf - configuration object
-   * @param corruptFiles - fills this list with all possible corrupted files
-   * @return - reader for the specified file or null
-   * @throws IOException
-   */
-  static Reader getReader(final Path path, final Configuration conf,
-      final List<String> corruptFiles) throws IOException {
-    FileSystem fs = path.getFileSystem(conf);
-    long dataFileLen = fs.getFileStatus(path).getLen();
-    System.err.println("Processing data file " + path + " [length: " + dataFileLen + "]");
-    Path sideFile = OrcAcidUtils.getSideFile(path);
-    final boolean sideFileExists = fs.exists(sideFile);
-    boolean openDataFile = false;
-    boolean openSideFile = false;
-    if (fs instanceof DistributedFileSystem) {
-      DistributedFileSystem dfs = (DistributedFileSystem) fs;
-      openDataFile = !dfs.isFileClosed(path);
-      openSideFile = sideFileExists && !dfs.isFileClosed(sideFile);
-    }
-
-    if (openDataFile || openSideFile) {
-      if (openDataFile && openSideFile) {
-        System.err.println("Unable to perform file dump as " + path + " and " + sideFile +
-            " are still open for writes.");
-      } else if (openSideFile) {
-        System.err.println("Unable to perform file dump as " + sideFile +
-            " is still open for writes.");
-      } else {
-        System.err.println("Unable to perform file dump as " + path +
-            " is still open for writes.");
-      }
-
-      return null;
-    }
-
-    Reader reader = null;
-    if (sideFileExists) {
-      final long maxLen = OrcAcidUtils.getLastFlushLength(fs, path);
-      final long sideFileLen = fs.getFileStatus(sideFile).getLen();
-      System.err.println("Found flush length file " + sideFile
-          + " [length: " + sideFileLen + ", maxFooterOffset: " + maxLen + "]");
-      // no offsets read from side file
-      if (maxLen == -1) {
-
-        // if data file is larger than last flush length, then additional data could be recovered
-        if (dataFileLen > maxLen) {
-          System.err.println("Data file has more data than max footer offset:" + maxLen +
-              ". Adding data file to recovery list.");
-          if (corruptFiles != null) {
-            corruptFiles.add(path.toUri().toString());
-          }
-        }
-        return null;
-      }
-
-      try {
-        reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).maxLength(maxLen));
-
-        // if data file is larger than last flush length, then additional data could be recovered
-        if (dataFileLen > maxLen) {
-          System.err.println("Data file has more data than max footer offset:" + maxLen +
-              ". Adding data file to recovery list.");
-          if (corruptFiles != null) {
-            corruptFiles.add(path.toUri().toString());
-          }
-        }
-      } catch (Exception e) {
-        if (corruptFiles != null) {
-          corruptFiles.add(path.toUri().toString());
-        }
-        System.err.println("Unable to read data from max footer offset." +
-            " Adding data file to recovery list.");
-        return null;
-      }
-    } else {
-      reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
-    }
-
-    return reader;
-  }
-
-  public static Collection<String> getAllFilesInPath(final Path path,
-      final Configuration conf) throws IOException {
-    List<String> filesInPath = Lists.newArrayList();
-    FileSystem fs = path.getFileSystem(conf);
-    FileStatus fileStatus = fs.getFileStatus(path);
-    if (fileStatus.isDir()) {
-      FileStatus[] fileStatuses = fs.listStatus(path, HIDDEN_AND_SIDE_FILE_FILTER);
-      for (FileStatus fileInPath : fileStatuses) {
-        if (fileInPath.isDir()) {
-          filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf));
-        } else {
-          filesInPath.add(fileInPath.getPath().toString());
-        }
-      }
-    } else {
-      filesInPath.add(path.toString());
-    }
-
-    return filesInPath;
-  }
-
-  private static void printData(List<String> files,
-      Configuration conf) throws IOException,
-      JSONException {
-    for (String file : files) {
-      try {
-        Path path = new Path(file);
-        Reader reader = getReader(path, conf, Lists.<String>newArrayList());
-        if (reader == null) {
-          continue;
-        }
-        printJsonData(reader);
-        System.out.println(SEPARATOR);
-      } catch (Exception e) {
-        System.err.println("Unable to dump data for file: " + file);
-        continue;
-      }
-    }
-  }
-
-  private static void printMetaData(List<String> files, Configuration conf,
-      List<Integer> rowIndexCols, boolean printTimeZone, final boolean recover,
-      final String backupPath)
-      throws IOException {
-    List<String> corruptFiles = Lists.newArrayList();
-    for (String filename : files) {
-      printMetaDataImpl(filename, conf, rowIndexCols, printTimeZone, corruptFiles);
-      System.out.println(SEPARATOR);
-    }
-
-    if (!corruptFiles.isEmpty()) {
-      if (recover) {
-        recoverFiles(corruptFiles, conf, backupPath);
-      } else {
-        System.err.println(corruptFiles.size() + " file(s) are corrupted." +
-            " Run the following command to recover corrupted files.\n");
-        String fileNames = Joiner.on(" ").skipNulls().join(corruptFiles);
-        System.err.println("hive --orcfiledump --recover --skip-dump " + fileNames);
-        System.out.println(SEPARATOR);
-      }
-    }
-  }
-
-  private static void printMetaDataImpl(final String filename,
-      final Configuration conf, List<Integer> rowIndexCols, final boolean printTimeZone,
-      final List<String> corruptFiles) throws IOException {
-    Path file = new Path(filename);
-    Reader reader = getReader(file, conf, corruptFiles);
-    // if we can create reader then footer is not corrupt and file will readable
-    if (reader == null) {
-      return;
-    }
-
-    System.out.println("Structure for " + filename);
-    System.out.println("File Version: " + reader.getFileVersion().getName() +
-        " with " + reader.getWriterVersion());
-    RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
-    System.out.println("Rows: " + reader.getNumberOfRows());
-    System.out.println("Compression: " + reader.getCompressionKind());
-    if (reader.getCompressionKind() != CompressionKind.NONE) {
-      System.out.println("Compression size: " + reader.getCompressionSize());
-    }
-    System.out.println("Type: " + reader.getSchema().toString());
-    System.out.println("\nStripe Statistics:");
-    List<StripeStatistics> stripeStats = reader.getStripeStatistics();
-    for (int n = 0; n < stripeStats.size(); n++) {
-      System.out.println("  Stripe " + (n + 1) + ":");
-      StripeStatistics ss = stripeStats.get(n);
-      for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
-        System.out.println("    Column " + i + ": " +
-            ss.getColumnStatistics()[i].toString());
-      }
-    }
-    ColumnStatistics[] stats = reader.getStatistics();
-    int colCount = stats.length;
-    if (rowIndexCols == null) {
-      rowIndexCols = new ArrayList<>(colCount);
-      for (int i = 0; i < colCount; ++i) {
-        rowIndexCols.add(i);
-      }
-    }
-    System.out.println("\nFile Statistics:");
-    for (int i = 0; i < stats.length; ++i) {
-      System.out.println("  Column " + i + ": " + stats[i].toString());
-    }
-    System.out.println("\nStripes:");
-    int stripeIx = -1;
-    for (StripeInformation stripe : reader.getStripes()) {
-      ++stripeIx;
-      long stripeStart = stripe.getOffset();
-      OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
-      if (printTimeZone) {
-        String tz = footer.getWriterTimezone();
-        if (tz == null || tz.isEmpty()) {
-          tz = UNKNOWN;
-        }
-        System.out.println("  Stripe: " + stripe.toString() + " timezone: " + tz);
-      } else {
-        System.out.println("  Stripe: " + stripe.toString());
-      }
-      long sectionStart = stripeStart;
-      for (OrcProto.Stream section : footer.getStreamsList()) {
-        String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
-        System.out.println("    Stream: column " + section.getColumn() +
-            " section " + kind + " start: " + sectionStart +
-            " length " + section.getLength());
-        sectionStart += section.getLength();
-      }
-      for (int i = 0; i < footer.getColumnsCount(); ++i) {
-        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-        StringBuilder buf = new StringBuilder();
-        buf.append("    Encoding column ");
-        buf.append(i);
-        buf.append(": ");
-        buf.append(encoding.getKind());
-        if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
-            encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
-          buf.append("[");
-          buf.append(encoding.getDictionarySize());
-          buf.append("]");
-        }
-        System.out.println(buf);
-      }
-      if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
-        // include the columns that are specified, only if the columns are included, bloom filter
-        // will be read
-        boolean[] sargColumns = new boolean[colCount];
-        for (int colIdx : rowIndexCols) {
-          sargColumns[colIdx] = true;
-        }
-        OrcIndex indices = rows
-            .readRowIndex(stripeIx, null, null, null, sargColumns);
-        for (int col : rowIndexCols) {
-          StringBuilder buf = new StringBuilder();
-          String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
-          buf.append(rowIdxString);
-          String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex());
-          buf.append(bloomFilString);
-          System.out.println(buf);
-        }
-      }
-    }
-
-    FileSystem fs = file.getFileSystem(conf);
-    long fileLen = fs.getFileStatus(file).getLen();
-    long paddedBytes = getTotalPaddingSize(reader);
-    // empty ORC file is ~45 bytes. Assumption here is file length always >0
-    double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
-    DecimalFormat format = new DecimalFormat("##.##");
-    System.out.println("\nFile length: " + fileLen + " bytes");
-    System.out.println("Padding length: " + paddedBytes + " bytes");
-    System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
-    AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
-    if (acidStats != null) {
-      System.out.println("ACID stats:" + acidStats);
-    }
-    rows.close();
-  }
-
-  private static void recoverFiles(final List<String> corruptFiles, final Configuration conf,
-      final String backup)
-      throws IOException {
-    for (String corruptFile : corruptFiles) {
-      System.err.println("Recovering file " + corruptFile);
-      Path corruptPath = new Path(corruptFile);
-      FileSystem fs = corruptPath.getFileSystem(conf);
-      FSDataInputStream fdis = fs.open(corruptPath);
-      try {
-        long corruptFileLen = fs.getFileStatus(corruptPath).getLen();
-        long remaining = corruptFileLen;
-        List<Long> footerOffsets = Lists.newArrayList();
-
-        // start reading the data file form top to bottom and record the valid footers
-        while (remaining > 0) {
-          int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
-          byte[] data = new byte[toRead];
-          long startPos = corruptFileLen - remaining;
-          fdis.readFully(startPos, data, 0, toRead);
-
-          // find all MAGIC string and see if the file is readable from there
-          int index = 0;
-          long nextFooterOffset;
-
-          while (index != -1) {
-            index = indexOf(data, OrcFile.MAGIC.getBytes(), index + 1);
-            if (index != -1) {
-              nextFooterOffset = startPos + index + OrcFile.MAGIC.length() + 1;
-              if (isReadable(corruptPath, conf, nextFooterOffset)) {
-                footerOffsets.add(nextFooterOffset);
-              }
-            }
-          }
-
-          System.err.println("Scanning for valid footers - startPos: " + startPos +
-              " toRead: " + toRead + " remaining: " + remaining);
-          remaining = remaining - toRead;
-        }
-
-        System.err.println("Readable footerOffsets: " + footerOffsets);
-        recoverFile(corruptPath, fs, conf, footerOffsets, backup);
-      } catch (Exception e) {
-        Path recoveryFile = getRecoveryFile(corruptPath);
-        if (fs.exists(recoveryFile)) {
-          fs.delete(recoveryFile, false);
-        }
-        System.err.println("Unable to recover file " + corruptFile);
-        e.printStackTrace();
-        System.err.println(SEPARATOR);
-        continue;
-      } finally {
-        fdis.close();
-      }
-      System.err.println(corruptFile + " recovered successfully!");
-      System.err.println(SEPARATOR);
-    }
-  }
-
-  private static void recoverFile(final Path corruptPath, final FileSystem fs,
-      final Configuration conf, final List<Long> footerOffsets, final String backup)
-      throws IOException {
-
-    // first recover the file to .recovered file and then once successful rename it to actual file
-    Path recoveredPath = getRecoveryFile(corruptPath);
-
-    // make sure that file does not exist
-    if (fs.exists(recoveredPath)) {
-      fs.delete(recoveredPath, false);
-    }
-
-    // if there are no valid footers, the file should still be readable so create an empty orc file
-    if (footerOffsets == null || footerOffsets.isEmpty()) {
-      System.err.println("No readable footers found. Creating empty orc file.");
-      TypeDescription schema = TypeDescription.createStruct();
-      Writer writer = OrcFile.createWriter(recoveredPath,
-          OrcFile.writerOptions(conf).setSchema(schema));
-      writer.close();
-    } else {
-      FSDataInputStream fdis = fs.open(corruptPath);
-      FileStatus fileStatus = fs.getFileStatus(corruptPath);
-      // read corrupt file and copy it to recovered file until last valid footer
-      FSDataOutputStream fdos = fs.create(recoveredPath, true,
-          conf.getInt("io.file.buffer.size", 4096),
-          fileStatus.getReplication(),
-          fileStatus.getBlockSize());
-      try {
-        long fileLen = footerOffsets.get(footerOffsets.size() - 1);
-        long remaining = fileLen;
-
-        while (remaining > 0) {
-          int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining);
-          byte[] data = new byte[toRead];
-          long startPos = fileLen - remaining;
-          fdis.readFully(startPos, data, 0, toRead);
-          fdos.write(data);
-          System.err.println("Copying data to recovery file - startPos: " + startPos +
-              " toRead: " + toRead + " remaining: " + remaining);
-          remaining = remaining - toRead;
-        }
-      } catch (Exception e) {
-        fs.delete(recoveredPath, false);
-        throw new IOException(e);
-      } finally {
-        fdis.close();
-        fdos.close();
-      }
-    }
-
-    // validate the recovered file once again and start moving corrupt files to backup folder
-    if (isReadable(recoveredPath, conf, Long.MAX_VALUE)) {
-      Path backupDataPath;
-      String scheme = corruptPath.toUri().getScheme();
-      String authority = corruptPath.toUri().getAuthority();
-      String filePath = corruptPath.toUri().getPath();
-
-      // use the same filesystem as corrupt file if backup-path is not explicitly specified
-      if (backup.equals(DEFAULT_BACKUP_PATH)) {
-        backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath);
-      } else {
-        backupDataPath = Path.mergePaths(new Path(backup), corruptPath);
-      }
-
-      // Move data file to backup path
-      moveFiles(fs, corruptPath, backupDataPath);
-
-      // Move side file to backup path
-      Path sideFilePath = OrcAcidUtils.getSideFile(corruptPath);
-      Path backupSideFilePath = new Path(backupDataPath.getParent(), sideFilePath.getName());
-      moveFiles(fs, sideFilePath, backupSideFilePath);
-
-      // finally move recovered file to actual file
-      moveFiles(fs, recoveredPath, corruptPath);
-
-      // we are done recovering, backing up and validating
-      System.err.println("Validation of recovered file successful!");
-    }
-  }
-
-  private static void moveFiles(final FileSystem fs, final Path src, final Path dest)
-      throws IOException {
-    try {
-      // create the dest directory if not exist
-      if (!fs.exists(dest.getParent())) {
-        fs.mkdirs(dest.getParent());
-      }
-
-      // if the destination file exists for some reason delete it
-      fs.delete(dest, false);
-
-      if (fs.rename(src, dest)) {
-        System.err.println("Moved " + src + " to " + dest);
-      } else {
-        throw new IOException("Unable to move " + src + " to " + dest);
-      }
-
-    } catch (Exception e) {
-      throw new IOException("Unable to move " + src + " to " + dest, e);
-    }
-  }
-
-  private static Path getRecoveryFile(final Path corruptPath) {
-    return new Path(corruptPath.getParent(), corruptPath.getName() + ".recovered");
-  }
-
-  private static boolean isReadable(final Path corruptPath, final Configuration conf,
-      final long maxLen) {
-    try {
-      OrcFile.createReader(corruptPath, OrcFile.readerOptions(conf).maxLength(maxLen));
-      return true;
-    } catch (Exception e) {
-      // ignore this exception as maxLen is unreadable
-      return false;
-    }
-  }
-
-  // search for byte pattern in another byte array
-  private static int indexOf(final byte[] data, final byte[] pattern, final int index) {
-    if (data == null || data.length == 0 || pattern == null || pattern.length == 0 ||
-        index > data.length || index < 0) {
-      return -1;
-    }
-
-    int j = 0;
-    for (int i = index; i < data.length; i++) {
-      if (pattern[j] == data[i]) {
-        j++;
-      } else {
-        j = 0;
-      }
-
-      if (j == pattern.length) {
-        return i - pattern.length + 1;
-      }
-    }
-
-    return -1;
-  }
-
-  private static String getFormattedBloomFilters(int col,
-      OrcProto.BloomFilterIndex[] bloomFilterIndex) {
-    StringBuilder buf = new StringBuilder();
-    BloomFilterIO stripeLevelBF = null;
-    if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
-      int idx = 0;
-      buf.append("\n    Bloom filters for column ").append(col).append(":");
-      for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
-        BloomFilterIO toMerge = new BloomFilterIO(bf);
-        buf.append("\n      Entry ").append(idx++).append(":").append(getBloomFilterStats(toMerge));
-        if (stripeLevelBF == null) {
-          stripeLevelBF = toMerge;
-        } else {
-          stripeLevelBF.merge(toMerge);
-        }
-      }
-      String bloomFilterStats = getBloomFilterStats(stripeLevelBF);
-      buf.append("\n      Stripe level merge:").append(bloomFilterStats);
-    }
-    return buf.toString();
-  }
-
-  private static String getBloomFilterStats(BloomFilterIO bf) {
-    StringBuilder sb = new StringBuilder();
-    int bitCount = bf.getBitSize();
-    int popCount = 0;
-    for (long l : bf.getBitSet()) {
-      popCount += Long.bitCount(l);
-    }
-    int k = bf.getNumHashFunctions();
-    float loadFactor = (float) popCount / (float) bitCount;
-    float expectedFpp = (float) Math.pow(loadFactor, k);
-    DecimalFormat df = new DecimalFormat("###.####");
-    sb.append(" numHashFunctions: ").append(k);
-    sb.append(" bitCount: ").append(bitCount);
-    sb.append(" popCount: ").append(popCount);
-    sb.append(" loadFactor: ").append(df.format(loadFactor));
-    sb.append(" expectedFpp: ").append(expectedFpp);
-    return sb.toString();
-  }
-
-  private static String getFormattedRowIndices(int col,
-                                               OrcProto.RowIndex[] rowGroupIndex) {
-    StringBuilder buf = new StringBuilder();
-    OrcProto.RowIndex index;
-    buf.append("    Row group indices for column ").append(col).append(":");
-    if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
-        ((index = rowGroupIndex[col]) == null)) {
-      buf.append(" not found\n");
-      return buf.toString();
-    }
-
-    for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
-      buf.append("\n      Entry ").append(entryIx).append(": ");
-      OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
-      if (entry == null) {
-        buf.append("unknown\n");
-        continue;
-      }
-      OrcProto.ColumnStatistics colStats = entry.getStatistics();
-      if (colStats == null) {
-        buf.append("no stats at ");
-      } else {
-        ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
-        buf.append(cs.toString());
-      }
-      buf.append(" positions: ");
-      for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
-        if (posIx != 0) {
-          buf.append(",");
-        }
-        buf.append(entry.getPositions(posIx));
-      }
-    }
-    return buf.toString();
-  }
-
-  public static long getTotalPaddingSize(Reader reader) throws IOException {
-    long paddedBytes = 0;
-    List<StripeInformation> stripes = reader.getStripes();
-    for (int i = 1; i < stripes.size(); i++) {
-      long prevStripeOffset = stripes.get(i - 1).getOffset();
-      long prevStripeLen = stripes.get(i - 1).getLength();
-      paddedBytes += stripes.get(i).getOffset() - (prevStripeOffset + prevStripeLen);
-    }
-    return paddedBytes;
-  }
-
-  @SuppressWarnings("static-access")
-  static Options createOptions() {
-    Options result = new Options();
-
-    // add -d and --data to print the rows
-    result.addOption(OptionBuilder
-        .withLongOpt("data")
-        .withDescription("Should the data be printed")
-        .create('d'));
-
-    // to avoid breaking unit tests (when run in different time zones) for file dump, printing
-    // of timezone is made optional
-    result.addOption(OptionBuilder
-        .withLongOpt("timezone")
-        .withDescription("Print writer's time zone")
-        .create('t'));
-
-    result.addOption(OptionBuilder
-        .withLongOpt("help")
-        .withDescription("print help message")
-        .create('h'));
-
-    result.addOption(OptionBuilder
-        .withLongOpt("rowindex")
-        .withArgName("comma separated list of column ids for which row index should be printed")
-        .withDescription("Dump stats for column number(s)")
-        .hasArg()
-        .create('r'));
-
-    result.addOption(OptionBuilder
-        .withLongOpt("json")
-        .withDescription("Print metadata in JSON format")
-        .create('j'));
-
-    result.addOption(OptionBuilder
-        .withLongOpt("pretty")
-        .withDescription("Pretty print json metadata output")
-        .create('p'));
-
-    result.addOption(OptionBuilder
-        .withLongOpt("recover")
-        .withDescription("recover corrupted orc files generated by streaming")
-        .create());
-
-    result.addOption(OptionBuilder
-        .withLongOpt("skip-dump")
-        .withDescription("used along with --recover to directly recover files without dumping")
-        .create());
-
-    result.addOption(OptionBuilder
-        .withLongOpt("backup-path")
-        .withDescription("specify a backup path to store the corrupted files (default: /tmp)")
-        .hasArg()
-        .create());
-    return result;
-  }
-
-  private static void printMap(JSONWriter writer,
-                               MapColumnVector vector,
-                               TypeDescription schema,
-                               int row) throws JSONException {
-    writer.array();
-    TypeDescription keyType = schema.getChildren().get(0);
-    TypeDescription valueType = schema.getChildren().get(1);
-    int offset = (int) vector.offsets[row];
-    for (int i = 0; i < vector.lengths[row]; ++i) {
-      writer.object();
-      writer.key("_key");
-      printValue(writer, vector.keys, keyType, offset + i);
-      writer.key("_value");
-      printValue(writer, vector.values, valueType, offset + i);
-      writer.endObject();
-    }
-    writer.endArray();
-  }
-
-  private static void printList(JSONWriter writer,
-                                ListColumnVector vector,
-                                TypeDescription schema,
-                                int row) throws JSONException {
-    writer.array();
-    int offset = (int) vector.offsets[row];
-    TypeDescription childType = schema.getChildren().get(0);
-    for (int i = 0; i < vector.lengths[row]; ++i) {
-      printValue(writer, vector.child, childType, offset + i);
-    }
-    writer.endArray();
-  }
-
-  private static void printUnion(JSONWriter writer,
-                                 UnionColumnVector vector,
-                                 TypeDescription schema,
-                                 int row) throws JSONException {
-    int tag = vector.tags[row];
-    printValue(writer, vector.fields[tag], schema.getChildren().get(tag), row);
-  }
-
-  static void printStruct(JSONWriter writer,
-                          StructColumnVector batch,
-                          TypeDescription schema,
-                          int row) throws JSONException {
-    writer.object();
-    List<String> fieldNames = schema.getFieldNames();
-    List<TypeDescription> fieldTypes = schema.getChildren();
-    for (int i = 0; i < fieldTypes.size(); ++i) {
-      writer.key(fieldNames.get(i));
-      printValue(writer, batch.fields[i], fieldTypes.get(i), row);
-    }
-    writer.endObject();
-  }
-
-  static void printBinary(JSONWriter writer, BytesColumnVector vector,
-                          int row) throws JSONException {
-    writer.array();
-    int offset = vector.start[row];
-    for(int i=0; i < vector.length[row]; ++i) {
-      writer.value(0xff & (int) vector.vector[row][offset + i]);
-    }
-    writer.endArray();
-  }
-  static void printValue(JSONWriter writer, ColumnVector vector,
-                         TypeDescription schema, int row) throws JSONException {
-    if (vector.isRepeating) {
-      row = 0;
-    }
-    if (vector.noNulls || !vector.isNull[row]) {
-      switch (schema.getCategory()) {
-        case BOOLEAN:
-          writer.value(((LongColumnVector) vector).vector[row] != 0);
-          break;
-        case BYTE:
-        case SHORT:
-        case INT:
-        case LONG:
-          writer.value(((LongColumnVector) vector).vector[row]);
-          break;
-        case FLOAT:
-        case DOUBLE:
-          writer.value(((DoubleColumnVector) vector).vector[row]);
-          break;
-        case STRING:
-        case CHAR:
-        case VARCHAR:
-          writer.value(((BytesColumnVector) vector).toString(row));
-          break;
-        case BINARY:
-          printBinary(writer, (BytesColumnVector) vector, row);
-          break;
-        case DECIMAL:
-          writer.value(((DecimalColumnVector) vector).vector[row].toString());
-          break;
-        case DATE:
-          writer.value(new DateWritable(
-              (int) ((LongColumnVector) vector).vector[row]).toString());
-          break;
-        case TIMESTAMP:
-          writer.value(((TimestampColumnVector) vector)
-              .asScratchTimestamp(row).toString());
-          break;
-        case LIST:
-          printList(writer, (ListColumnVector) vector, schema, row);
-          break;
-        case MAP:
-          printMap(writer, (MapColumnVector) vector, schema, row);
-          break;
-        case STRUCT:
-          printStruct(writer, (StructColumnVector) vector, schema, row);
-          break;
-        case UNION:
-          printUnion(writer, (UnionColumnVector) vector, schema, row);
-          break;
-        default:
-          throw new IllegalArgumentException("Unknown type " +
-              schema.toString());
-      }
-    } else {
-      writer.value(null);
-    }
-  }
-
-  static void printRow(JSONWriter writer,
-                       VectorizedRowBatch batch,
-                       TypeDescription schema,
-                       int row) throws JSONException {
-    if (schema.getCategory() == TypeDescription.Category.STRUCT) {
-      List<TypeDescription> fieldTypes = schema.getChildren();
-      List<String> fieldNames = schema.getFieldNames();
-      writer.object();
-      for (int c = 0; c < batch.cols.length; ++c) {
-        writer.key(fieldNames.get(c));
-        printValue(writer, batch.cols[c], fieldTypes.get(c), row);
-      }
-      writer.endObject();
-    } else {
-      printValue(writer, batch.cols[0], schema, row);
-    }
-  }
-
-  static void printJsonData(final Reader reader) throws IOException, JSONException {
-    PrintStream printStream = System.out;
-    OutputStreamWriter out = new OutputStreamWriter(printStream, "UTF-8");
-    RecordReader rows = reader.rows();
-    try {
-      TypeDescription schema = reader.getSchema();
-      VectorizedRowBatch batch = schema.createRowBatch();
-      while (rows.nextBatch(batch)) {
-        for(int r=0; r < batch.size; ++r) {
-          JSONWriter writer = new JSONWriter(out);
-          printRow(writer, batch, schema, r);
-          out.write("\n");
-          out.flush();
-          if (printStream.checkError()) {
-            throw new IOException("Error encountered when writing to stdout.");
-          }
-        }
-      }
-    } finally {
-      rows.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/tools/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/tools/JsonFileDump.java b/orc/src/java/org/apache/orc/tools/JsonFileDump.java
deleted file mode 100644
index e2048ea..0000000
--- a/orc/src/java/org/apache/orc/tools/JsonFileDump.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.tools;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.Reader;
-import org.apache.orc.impl.AcidStats;
-import org.apache.orc.impl.OrcAcidUtils;
-import org.apache.orc.impl.RecordReaderImpl;
-import org.codehaus.jettison.json.JSONArray;
-import org.apache.orc.BloomFilterIO;
-import org.apache.orc.BinaryColumnStatistics;
-import org.apache.orc.BooleanColumnStatistics;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.DateColumnStatistics;
-import org.apache.orc.DecimalColumnStatistics;
-import org.apache.orc.DoubleColumnStatistics;
-import org.apache.orc.IntegerColumnStatistics;
-import org.apache.orc.impl.OrcIndex;
-import org.apache.orc.OrcProto;
-import org.apache.orc.StringColumnStatistics;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-import org.apache.orc.TimestampColumnStatistics;
-import org.codehaus.jettison.json.JSONException;
-import org.codehaus.jettison.json.JSONObject;
-import org.codehaus.jettison.json.JSONStringer;
-import org.codehaus.jettison.json.JSONWriter;
-
-/**
- * File dump tool with json formatted output.
- */
-public class JsonFileDump {
-
-  public static void printJsonMetaData(List<String> files,
-      Configuration conf,
-      List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone)
-      throws JSONException, IOException {
-    if (files.isEmpty()) {
-      return;
-    }
-    JSONStringer writer = new JSONStringer();
-    boolean multiFile = files.size() > 1;
-    if (multiFile) {
-      writer.array();
-    } else {
-      writer.object();
-    }
-    for (String filename : files) {
-      try {
-        if (multiFile) {
-          writer.object();
-        }
-        writer.key("fileName").value(filename);
-        Path path = new Path(filename);
-        Reader reader = FileDump.getReader(path, conf, null);
-        if (reader == null) {
-          writer.key("status").value("FAILED");
-          continue;
-        }
-        writer.key("fileVersion").value(reader.getFileVersion().getName());
-        writer.key("writerVersion").value(reader.getWriterVersion());
-        RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
-        writer.key("numberOfRows").value(reader.getNumberOfRows());
-        writer.key("compression").value(reader.getCompressionKind());
-        if (reader.getCompressionKind() != CompressionKind.NONE) {
-          writer.key("compressionBufferSize").value(reader.getCompressionSize());
-        }
-        writer.key("schemaString").value(reader.getSchema().toString());
-        writer.key("schema").array();
-        writeSchema(writer, reader.getTypes());
-        writer.endArray();
-
-        writer.key("stripeStatistics").array();
-        List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
-        for (int n = 0; n < stripeStatistics.size(); n++) {
-          writer.object();
-          writer.key("stripeNumber").value(n + 1);
-          StripeStatistics ss = stripeStatistics.get(n);
-          writer.key("columnStatistics").array();
-          for (int i = 0; i < ss.getColumnStatistics().length; i++) {
-            writer.object();
-            writer.key("columnId").value(i);
-            writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
-            writer.endObject();
-          }
-          writer.endArray();
-          writer.endObject();
-        }
-        writer.endArray();
-
-        ColumnStatistics[] stats = reader.getStatistics();
-        int colCount = stats.length;
-        if (rowIndexCols == null) {
-          rowIndexCols = new ArrayList<>(colCount);
-          for (int i = 0; i < colCount; ++i) {
-            rowIndexCols.add(i);
-          }
-        }
-        writer.key("fileStatistics").array();
-        for (int i = 0; i < stats.length; ++i) {
-          writer.object();
-          writer.key("columnId").value(i);
-          writeColumnStatistics(writer, stats[i]);
-          writer.endObject();
-        }
-        writer.endArray();
-
-        writer.key("stripes").array();
-        int stripeIx = -1;
-        for (StripeInformation stripe : reader.getStripes()) {
-          ++stripeIx;
-          long stripeStart = stripe.getOffset();
-          OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
-          writer.object(); // start of stripe information
-          writer.key("stripeNumber").value(stripeIx + 1);
-          writer.key("stripeInformation");
-          writeStripeInformation(writer, stripe);
-          if (printTimeZone) {
-            writer.key("writerTimezone").value(
-                footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
-          }
-          long sectionStart = stripeStart;
-
-          writer.key("streams").array();
-          for (OrcProto.Stream section : footer.getStreamsList()) {
-            writer.object();
-            String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
-            writer.key("columnId").value(section.getColumn());
-            writer.key("section").value(kind);
-            writer.key("startOffset").value(sectionStart);
-            writer.key("length").value(section.getLength());
-            sectionStart += section.getLength();
-            writer.endObject();
-          }
-          writer.endArray();
-
-          writer.key("encodings").array();
-          for (int i = 0; i < footer.getColumnsCount(); ++i) {
-            writer.object();
-            OrcProto.ColumnEncoding encoding = footer.getColumns(i);
-            writer.key("columnId").value(i);
-            writer.key("kind").value(encoding.getKind());
-            if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
-                encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
-              writer.key("dictionarySize").value(encoding.getDictionarySize());
-            }
-            writer.endObject();
-          }
-          writer.endArray();
-          if (!rowIndexCols.isEmpty()) {
-            // include the columns that are specified, only if the columns are included, bloom filter
-            // will be read
-            boolean[] sargColumns = new boolean[colCount];
-            for (int colIdx : rowIndexCols) {
-              sargColumns[colIdx] = true;
-            }
-            OrcIndex indices = rows.readRowIndex(stripeIx, null, sargColumns);
-            writer.key("indexes").array();
-            for (int col : rowIndexCols) {
-              writer.object();
-              writer.key("columnId").value(col);
-              writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
-              writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
-              writer.endObject();
-            }
-            writer.endArray();
-          }
-          writer.endObject(); // end of stripe information
-        }
-        writer.endArray();
-
-        FileSystem fs = path.getFileSystem(conf);
-        long fileLen = fs.getContentSummary(path).getLength();
-        long paddedBytes = FileDump.getTotalPaddingSize(reader);
-        // empty ORC file is ~45 bytes. Assumption here is file length always >0
-        double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
-        writer.key("fileLength").value(fileLen);
-        writer.key("paddingLength").value(paddedBytes);
-        writer.key("paddingRatio").value(percentPadding);
-        AcidStats acidStats = OrcAcidUtils.parseAcidStats(reader);
-        if (acidStats != null) {
-          writer.key("numInserts").value(acidStats.inserts);
-          writer.key("numDeletes").value(acidStats.deletes);
-          writer.key("numUpdates").value(acidStats.updates);
-        }
-        writer.key("status").value("OK");
-        rows.close();
-
-        writer.endObject();
-      } catch (Exception e) {
-        writer.key("status").value("FAILED");
-        throw e;
-      }
-    }
-    if (multiFile) {
-      writer.endArray();
-    }
-
-    if (prettyPrint) {
-      final String prettyJson;
-      if (multiFile) {
-        JSONArray jsonArray = new JSONArray(writer.toString());
-        prettyJson = jsonArray.toString(2);
-      } else {
-        JSONObject jsonObject = new JSONObject(writer.toString());
-        prettyJson = jsonObject.toString(2);
-      }
-      System.out.println(prettyJson);
-    } else {
-      System.out.println(writer.toString());
-    }
-  }
-
-  private static void writeSchema(JSONStringer writer, List<OrcProto.Type> types)
-      throws JSONException {
-    int i = 0;
-    for(OrcProto.Type type : types) {
-      writer.object();
-      writer.key("columnId").value(i++);
-      writer.key("columnType").value(type.getKind());
-      if (type.getFieldNamesCount() > 0) {
-        writer.key("childColumnNames").array();
-        for (String field : type.getFieldNamesList()) {
-          writer.value(field);
-        }
-        writer.endArray();
-        writer.key("childColumnIds").array();
-        for (Integer colId : type.getSubtypesList()) {
-          writer.value(colId);
-        }
-        writer.endArray();
-      }
-      if (type.hasPrecision()) {
-        writer.key("precision").value(type.getPrecision());
-      }
-
-      if (type.hasScale()) {
-        writer.key("scale").value(type.getScale());
-      }
-
-      if (type.hasMaximumLength()) {
-        writer.key("maxLength").value(type.getMaximumLength());
-      }
-      writer.endObject();
-    }
-  }
-
-  private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe)
-      throws JSONException {
-    writer.object();
-    writer.key("offset").value(stripe.getOffset());
-    writer.key("indexLength").value(stripe.getIndexLength());
-    writer.key("dataLength").value(stripe.getDataLength());
-    writer.key("footerLength").value(stripe.getFooterLength());
-    writer.key("rowCount").value(stripe.getNumberOfRows());
-    writer.endObject();
-  }
-
-  private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs)
-      throws JSONException {
-    if (cs != null) {
-      writer.key("count").value(cs.getNumberOfValues());
-      writer.key("hasNull").value(cs.hasNull());
-      if (cs instanceof BinaryColumnStatistics) {
-        writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum());
-        writer.key("type").value(OrcProto.Type.Kind.BINARY);
-      } else if (cs instanceof BooleanColumnStatistics) {
-        writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount());
-        writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount());
-        writer.key("type").value(OrcProto.Type.Kind.BOOLEAN);
-      } else if (cs instanceof IntegerColumnStatistics) {
-        writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum());
-        writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum());
-        if (((IntegerColumnStatistics) cs).isSumDefined()) {
-          writer.key("sum").value(((IntegerColumnStatistics) cs).getSum());
-        }
-        writer.key("type").value(OrcProto.Type.Kind.LONG);
-      } else if (cs instanceof DoubleColumnStatistics) {
-        writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum());
-        writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum());
-        writer.key("sum").value(((DoubleColumnStatistics) cs).getSum());
-        writer.key("type").value(OrcProto.Type.Kind.DOUBLE);
-      } else if (cs instanceof StringColumnStatistics) {
-        writer.key("min").value(((StringColumnStatistics) cs).getMinimum());
-        writer.key("max").value(((StringColumnStatistics) cs).getMaximum());
-        writer.key("totalLength").value(((StringColumnStatistics) cs).getSum());
-        writer.key("type").value(OrcProto.Type.Kind.STRING);
-      } else if (cs instanceof DateColumnStatistics) {
-        if (((DateColumnStatistics) cs).getMaximum() != null) {
-          writer.key("min").value(((DateColumnStatistics) cs).getMinimum());
-          writer.key("max").value(((DateColumnStatistics) cs).getMaximum());
-        }
-        writer.key("type").value(OrcProto.Type.Kind.DATE);
-      } else if (cs instanceof TimestampColumnStatistics) {
-        if (((TimestampColumnStatistics) cs).getMaximum() != null) {
-          writer.key("min").value(((TimestampColumnStatistics) cs).getMinimum());
-          writer.key("max").value(((TimestampColumnStatistics) cs).getMaximum());
-        }
-        writer.key("type").value(OrcProto.Type.Kind.TIMESTAMP);
-      } else if (cs instanceof DecimalColumnStatistics) {
-        if (((DecimalColumnStatistics) cs).getMaximum() != null) {
-          writer.key("min").value(((DecimalColumnStatistics) cs).getMinimum());
-          writer.key("max").value(((DecimalColumnStatistics) cs).getMaximum());
-          writer.key("sum").value(((DecimalColumnStatistics) cs).getSum());
-        }
-        writer.key("type").value(OrcProto.Type.Kind.DECIMAL);
-      }
-    }
-  }
-
-  private static void writeBloomFilterIndexes(JSONWriter writer, int col,
-      OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException {
-
-    BloomFilterIO stripeLevelBF = null;
-    if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
-      int entryIx = 0;
-      writer.key("bloomFilterIndexes").array();
-      for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
-        writer.object();
-        writer.key("entryId").value(entryIx++);
-        BloomFilterIO toMerge = new BloomFilterIO(bf);
-        writeBloomFilterStats(writer, toMerge);
-        if (stripeLevelBF == null) {
-          stripeLevelBF = toMerge;
-        } else {
-          stripeLevelBF.merge(toMerge);
-        }
-        writer.endObject();
-      }
-      writer.endArray();
-    }
-    if (stripeLevelBF != null) {
-      writer.key("stripeLevelBloomFilter");
-      writer.object();
-      writeBloomFilterStats(writer, stripeLevelBF);
-      writer.endObject();
-    }
-  }
-
-  private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf)
-      throws JSONException {
-    int bitCount = bf.getBitSize();
-    int popCount = 0;
-    for (long l : bf.getBitSet()) {
-      popCount += Long.bitCount(l);
-    }
-    int k = bf.getNumHashFunctions();
-    float loadFactor = (float) popCount / (float) bitCount;
-    float expectedFpp = (float) Math.pow(loadFactor, k);
-    writer.key("numHashFunctions").value(k);
-    writer.key("bitCount").value(bitCount);
-    writer.key("popCount").value(popCount);
-    writer.key("loadFactor").value(loadFactor);
-    writer.key("expectedFpp").value(expectedFpp);
-  }
-
-  private static void writeRowGroupIndexes(JSONWriter writer, int col,
-      OrcProto.RowIndex[] rowGroupIndex)
-      throws JSONException {
-
-    OrcProto.RowIndex index;
-    if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
-        ((index = rowGroupIndex[col]) == null)) {
-      return;
-    }
-
-    writer.key("rowGroupIndexes").array();
-    for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
-      writer.object();
-      writer.key("entryId").value(entryIx);
-      OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
-      if (entry == null) {
-        continue;
-      }
-      OrcProto.ColumnStatistics colStats = entry.getStatistics();
-      writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
-      writer.key("positions").array();
-      for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
-        writer.value(entry.getPositions(posIx));
-      }
-      writer.endArray();
-      writer.endObject();
-    }
-    writer.endArray();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/protobuf/orc_proto.proto
----------------------------------------------------------------------
diff --git a/orc/src/protobuf/orc_proto.proto b/orc/src/protobuf/orc_proto.proto
index ae5945b..73f68b1 100644
--- a/orc/src/protobuf/orc_proto.proto
+++ b/orc/src/protobuf/orc_proto.proto
@@ -18,7 +18,7 @@
 
 package orc.proto;
 
-option java_package = "org.apache.orc";
+option java_package = "org.apache.hive.orc";
 
 message IntegerStatistics  {
   optional sint64 minimum = 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/TestColumnStatistics.java b/orc/src/test/org/apache/hive/orc/TestColumnStatistics.java
new file mode 100644
index 0000000..87c12c9
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/TestColumnStatistics.java
@@ -0,0 +1,364 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import static junit.framework.Assert.assertEquals;
+import static org.junit.Assume.assumeTrue;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.sql.Timestamp;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.orc.impl.ColumnStatisticsImpl;
+import org.apache.hive.orc.tools.FileDump;
+import org.apache.hive.orc.tools.TestFileDump;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+/**
+ * Test ColumnStatisticsImpl for ORC.
+ */
+public class TestColumnStatistics {
+
+  @Test
+  public void testLongMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createInt();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateInteger(10, 2);
+    stats2.updateInteger(1, 1);
+    stats2.updateInteger(1000, 1);
+    stats1.merge(stats2);
+    IntegerColumnStatistics typed = (IntegerColumnStatistics) stats1;
+    assertEquals(1, typed.getMinimum());
+    assertEquals(1000, typed.getMaximum());
+    stats1.reset();
+    stats1.updateInteger(-10, 1);
+    stats1.updateInteger(10000, 1);
+    stats1.merge(stats2);
+    assertEquals(-10, typed.getMinimum());
+    assertEquals(10000, typed.getMaximum());
+  }
+
+  @Test
+  public void testDoubleMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createDouble();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateDouble(10.0);
+    stats1.updateDouble(100.0);
+    stats2.updateDouble(1.0);
+    stats2.updateDouble(1000.0);
+    stats1.merge(stats2);
+    DoubleColumnStatistics typed = (DoubleColumnStatistics) stats1;
+    assertEquals(1.0, typed.getMinimum(), 0.001);
+    assertEquals(1000.0, typed.getMaximum(), 0.001);
+    stats1.reset();
+    stats1.updateDouble(-10);
+    stats1.updateDouble(10000);
+    stats1.merge(stats2);
+    assertEquals(-10, typed.getMinimum(), 0.001);
+    assertEquals(10000, typed.getMaximum(), 0.001);
+  }
+
+
+  @Test
+  public void testStringMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateString(new Text("bob"));
+    stats1.updateString(new Text("david"));
+    stats1.updateString(new Text("charles"));
+    stats2.updateString(new Text("anne"));
+    byte[] erin = new byte[]{0, 1, 2, 3, 4, 5, 101, 114, 105, 110};
+    stats2.updateString(erin, 6, 4, 5);
+    assertEquals(24, ((StringColumnStatistics)stats2).getSum());
+    stats1.merge(stats2);
+    StringColumnStatistics typed = (StringColumnStatistics) stats1;
+    assertEquals("anne", typed.getMinimum());
+    assertEquals("erin", typed.getMaximum());
+    assertEquals(39, typed.getSum());
+    stats1.reset();
+    stats1.updateString(new Text("aaa"));
+    stats1.updateString(new Text("zzz"));
+    stats1.merge(stats2);
+    assertEquals("aaa", typed.getMinimum());
+    assertEquals("zzz", typed.getMaximum());
+  }
+
+  @Test
+  public void testDateMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createDate();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateDate(new DateWritable(1000));
+    stats1.updateDate(new DateWritable(100));
+    stats2.updateDate(new DateWritable(10));
+    stats2.updateDate(new DateWritable(2000));
+    stats1.merge(stats2);
+    DateColumnStatistics typed = (DateColumnStatistics) stats1;
+    assertEquals(new DateWritable(10).get(), typed.getMinimum());
+    assertEquals(new DateWritable(2000).get(), typed.getMaximum());
+    stats1.reset();
+    stats1.updateDate(new DateWritable(-10));
+    stats1.updateDate(new DateWritable(10000));
+    stats1.merge(stats2);
+    assertEquals(new DateWritable(-10).get(), typed.getMinimum());
+    assertEquals(new DateWritable(10000).get(), typed.getMaximum());
+  }
+
+  @Test
+  public void testTimestampMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createTimestamp();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateTimestamp(new Timestamp(10));
+    stats1.updateTimestamp(new Timestamp(100));
+    stats2.updateTimestamp(new Timestamp(1));
+    stats2.updateTimestamp(new Timestamp(1000));
+    stats1.merge(stats2);
+    TimestampColumnStatistics typed = (TimestampColumnStatistics) stats1;
+    assertEquals(1, typed.getMinimum().getTime());
+    assertEquals(1000, typed.getMaximum().getTime());
+    stats1.reset();
+    stats1.updateTimestamp(new Timestamp(-10));
+    stats1.updateTimestamp(new Timestamp(10000));
+    stats1.merge(stats2);
+    assertEquals(-10, typed.getMinimum().getTime());
+    assertEquals(10000, typed.getMaximum().getTime());
+  }
+
+  @Test
+  public void testDecimalMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createDecimal()
+        .withPrecision(38).withScale(16);
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateDecimal(new HiveDecimalWritable(10));
+    stats1.updateDecimal(new HiveDecimalWritable(100));
+    stats2.updateDecimal(new HiveDecimalWritable(1));
+    stats2.updateDecimal(new HiveDecimalWritable(1000));
+    stats1.merge(stats2);
+    DecimalColumnStatistics typed = (DecimalColumnStatistics) stats1;
+    assertEquals(1, typed.getMinimum().longValue());
+    assertEquals(1000, typed.getMaximum().longValue());
+    stats1.reset();
+    stats1.updateDecimal(new HiveDecimalWritable(-10));
+    stats1.updateDecimal(new HiveDecimalWritable(10000));
+    stats1.merge(stats2);
+    assertEquals(-10, typed.getMinimum().longValue());
+    assertEquals(10000, typed.getMaximum().longValue());
+  }
+
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestOrcFile." + testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  private static BytesWritable bytes(int... items) {
+    BytesWritable result = new BytesWritable();
+    result.setSize(items.length);
+    for (int i = 0; i < items.length; ++i) {
+      result.getBytes()[i] = (byte) items[i];
+    }
+    return result;
+  }
+
+  void appendRow(VectorizedRowBatch batch, BytesWritable bytes,
+                 String str) {
+    int row = batch.size++;
+    if (bytes == null) {
+      batch.cols[0].noNulls = false;
+      batch.cols[0].isNull[row] = true;
+    } else {
+      ((BytesColumnVector) batch.cols[0]).setVal(row, bytes.getBytes(),
+          0, bytes.getLength());
+    }
+    if (str == null) {
+      batch.cols[1].noNulls = false;
+      batch.cols[1].isNull[row] = true;
+    } else {
+      ((BytesColumnVector) batch.cols[1]).setVal(row, str.getBytes());
+    }
+  }
+
+  @Test
+  public void testHasNull() throws Exception {
+    TypeDescription schema =
+        TypeDescription.createStruct()
+        .addField("bytes1", TypeDescription.createBinary())
+        .addField("string1", TypeDescription.createString());
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(schema)
+            .rowIndexStride(1000)
+            .stripeSize(10000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch(5000);
+    // STRIPE 1
+    // RG1
+    for(int i=0; i<1000; i++) {
+      appendRow(batch, bytes(1, 2, 3), "RG1");
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // RG2
+    for(int i=0; i<1000; i++) {
+      appendRow(batch, bytes(1, 2, 3), null);
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // RG3
+    for(int i=0; i<1000; i++) {
+      appendRow(batch, bytes(1, 2, 3), "RG3");
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // RG4
+    for (int i = 0; i < 1000; i++) {
+      appendRow(batch, bytes(1,2,3), null);
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // RG5
+    for(int i=0; i<1000; i++) {
+      appendRow(batch, bytes(1, 2, 3), null);
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // STRIPE 2
+    for (int i = 0; i < 5000; i++) {
+      appendRow(batch, bytes(1,2,3), null);
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // STRIPE 3
+    for (int i = 0; i < 5000; i++) {
+      appendRow(batch, bytes(1,2,3), "STRIPE-3");
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    // STRIPE 4
+    for (int i = 0; i < 5000; i++) {
+      appendRow(batch, bytes(1,2,3), null);
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    // check the file level stats
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(20000, stats[0].getNumberOfValues());
+    assertEquals(20000, stats[1].getNumberOfValues());
+    assertEquals(7000, stats[2].getNumberOfValues());
+    assertEquals(false, stats[0].hasNull());
+    assertEquals(false, stats[1].hasNull());
+    assertEquals(true, stats[2].hasNull());
+
+    // check the stripe level stats
+    List<StripeStatistics> stripeStats = reader.getStripeStatistics();
+    // stripe 1 stats
+    StripeStatistics ss1 = stripeStats.get(0);
+    ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
+    ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
+    ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];
+    assertEquals(false, ss1_cs1.hasNull());
+    assertEquals(false, ss1_cs2.hasNull());
+    assertEquals(true, ss1_cs3.hasNull());
+
+    // stripe 2 stats
+    StripeStatistics ss2 = stripeStats.get(1);
+    ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
+    ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
+    ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];
+    assertEquals(false, ss2_cs1.hasNull());
+    assertEquals(false, ss2_cs2.hasNull());
+    assertEquals(true, ss2_cs3.hasNull());
+
+    // stripe 3 stats
+    StripeStatistics ss3 = stripeStats.get(2);
+    ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
+    ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
+    ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];
+    assertEquals(false, ss3_cs1.hasNull());
+    assertEquals(false, ss3_cs2.hasNull());
+    assertEquals(false, ss3_cs3.hasNull());
+
+    // stripe 4 stats
+    StripeStatistics ss4 = stripeStats.get(3);
+    ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
+    ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
+    ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];
+    assertEquals(false, ss4_cs1.hasNull());
+    assertEquals(false, ss4_cs2.hasNull());
+    assertEquals(true, ss4_cs3.hasNull());
+
+    // Test file dump
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-has-null.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+    System.out.flush();
+    System.setOut(origOut);
+    // If called with an expression evaluating to false, the test will halt
+    // and be ignored.
+    assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
+    TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}


[31/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/DataReaderProperties.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/DataReaderProperties.java b/orc/src/java/org/apache/hive/orc/impl/DataReaderProperties.java
new file mode 100644
index 0000000..adef538
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/DataReaderProperties.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.CompressionKind;
+
+public final class DataReaderProperties {
+
+  private final FileSystem fileSystem;
+  private final Path path;
+  private final CompressionKind compression;
+  private final boolean zeroCopy;
+  private final int typeCount;
+  private final int bufferSize;
+
+  private DataReaderProperties(Builder builder) {
+    this.fileSystem = builder.fileSystem;
+    this.path = builder.path;
+    this.compression = builder.compression;
+    this.zeroCopy = builder.zeroCopy;
+    this.typeCount = builder.typeCount;
+    this.bufferSize = builder.bufferSize;
+  }
+
+  public FileSystem getFileSystem() {
+    return fileSystem;
+  }
+
+  public Path getPath() {
+    return path;
+  }
+
+  public CompressionKind getCompression() {
+    return compression;
+  }
+
+  public boolean getZeroCopy() {
+    return zeroCopy;
+  }
+
+  public int getTypeCount() {
+    return typeCount;
+  }
+
+  public int getBufferSize() {
+    return bufferSize;
+  }
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private FileSystem fileSystem;
+    private Path path;
+    private CompressionKind compression;
+    private boolean zeroCopy;
+    private int typeCount;
+    private int bufferSize;
+
+    private Builder() {
+
+    }
+
+    public Builder withFileSystem(FileSystem fileSystem) {
+      this.fileSystem = fileSystem;
+      return this;
+    }
+
+    public Builder withPath(Path path) {
+      this.path = path;
+      return this;
+    }
+
+    public Builder withCompression(CompressionKind value) {
+      this.compression = value;
+      return this;
+    }
+
+    public Builder withZeroCopy(boolean zeroCopy) {
+      this.zeroCopy = zeroCopy;
+      return this;
+    }
+
+    public Builder withTypeCount(int value) {
+      this.typeCount = value;
+      return this;
+    }
+
+    public Builder withBufferSize(int value) {
+      this.bufferSize = value;
+      return this;
+    }
+
+    public DataReaderProperties build() {
+      Preconditions.checkNotNull(fileSystem);
+      Preconditions.checkNotNull(path);
+
+      return new DataReaderProperties(this);
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/DirectDecompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/DirectDecompressionCodec.java b/orc/src/java/org/apache/hive/orc/impl/DirectDecompressionCodec.java
new file mode 100644
index 0000000..e6cb84e
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/DirectDecompressionCodec.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public interface DirectDecompressionCodec extends CompressionCodec {
+  public boolean isAvailable();
+  public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/DynamicByteArray.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/DynamicByteArray.java b/orc/src/java/org/apache/hive/orc/impl/DynamicByteArray.java
new file mode 100644
index 0000000..004f69a
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/DynamicByteArray.java
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * A class that is a growable array of bytes. Growth is managed in terms of
+ * chunks that are allocated when needed.
+ */
+public final class DynamicByteArray {
+  static final int DEFAULT_CHUNKSIZE = 32 * 1024;
+  static final int DEFAULT_NUM_CHUNKS = 128;
+
+  private final int chunkSize;        // our allocation sizes
+  private byte[][] data;              // the real data
+  private int length;                 // max set element index +1
+  private int initializedChunks = 0;  // the number of chunks created
+
+  public DynamicByteArray() {
+    this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
+  }
+
+  public DynamicByteArray(int numChunks, int chunkSize) {
+    if (chunkSize == 0) {
+      throw new IllegalArgumentException("bad chunksize");
+    }
+    this.chunkSize = chunkSize;
+    data = new byte[numChunks][];
+  }
+
+  /**
+   * Ensure that the given index is valid.
+   */
+  private void grow(int chunkIndex) {
+    if (chunkIndex >= initializedChunks) {
+      if (chunkIndex >= data.length) {
+        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+        byte[][] newChunk = new byte[newSize][];
+        System.arraycopy(data, 0, newChunk, 0, data.length);
+        data = newChunk;
+      }
+      for(int i=initializedChunks; i <= chunkIndex; ++i) {
+        data[i] = new byte[chunkSize];
+      }
+      initializedChunks = chunkIndex + 1;
+    }
+  }
+
+  public byte get(int index) {
+    if (index >= length) {
+      throw new IndexOutOfBoundsException("Index " + index +
+                                            " is outside of 0.." +
+                                            (length - 1));
+    }
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    return data[i][j];
+  }
+
+  public void set(int index, byte value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] = value;
+  }
+
+  public int add(byte value) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow(i);
+    data[i][j] = value;
+    int result = length;
+    length += 1;
+    return result;
+  }
+
+  /**
+   * Copy a slice of a byte array into our buffer.
+   * @param value the array to copy from
+   * @param valueOffset the first location to copy from value
+   * @param valueLength the number of bytes to copy from value
+   * @return the offset of the start of the value
+   */
+  public int add(byte[] value, int valueOffset, int valueLength) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow((length + valueLength) / chunkSize);
+    int remaining = valueLength;
+    while (remaining > 0) {
+      int size = Math.min(remaining, chunkSize - j);
+      System.arraycopy(value, valueOffset, data[i], j, size);
+      remaining -= size;
+      valueOffset += size;
+      i += 1;
+      j = 0;
+    }
+    int result = length;
+    length += valueLength;
+    return result;
+  }
+
+  /**
+   * Read the entire stream into this array.
+   * @param in the stream to read from
+   * @throws IOException
+   */
+  public void readAll(InputStream in) throws IOException {
+    int currentChunk = length / chunkSize;
+    int currentOffset = length % chunkSize;
+    grow(currentChunk);
+    int currentLength = in.read(data[currentChunk], currentOffset,
+      chunkSize - currentOffset);
+    while (currentLength > 0) {
+      length += currentLength;
+      currentOffset = length % chunkSize;
+      if (currentOffset == 0) {
+        currentChunk = length / chunkSize;
+        grow(currentChunk);
+      }
+      currentLength = in.read(data[currentChunk], currentOffset,
+        chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Byte compare a set of bytes against the bytes in this dynamic array.
+   * @param other source of the other bytes
+   * @param otherOffset start offset in the other array
+   * @param otherLength number of bytes in the other array
+   * @param ourOffset the offset in our array
+   * @param ourLength the number of bytes in our array
+   * @return negative for less, 0 for equal, positive for greater
+   */
+  public int compare(byte[] other, int otherOffset, int otherLength,
+                     int ourOffset, int ourLength) {
+    int currentChunk = ourOffset / chunkSize;
+    int currentOffset = ourOffset % chunkSize;
+    int maxLength = Math.min(otherLength, ourLength);
+    while (maxLength > 0 &&
+      other[otherOffset] == data[currentChunk][currentOffset]) {
+      otherOffset += 1;
+      currentOffset += 1;
+      if (currentOffset == chunkSize) {
+        currentChunk += 1;
+        currentOffset = 0;
+      }
+      maxLength -= 1;
+    }
+    if (maxLength == 0) {
+      return otherLength - ourLength;
+    }
+    int otherByte = 0xff & other[otherOffset];
+    int ourByte = 0xff & data[currentChunk][currentOffset];
+    return otherByte > ourByte ? 1 : -1;
+  }
+
+  /**
+   * Get the size of the array.
+   * @return the number of bytes in the array
+   */
+  public int size() {
+    return length;
+  }
+
+  /**
+   * Clear the array to its original pristine state.
+   */
+  public void clear() {
+    length = 0;
+    for(int i=0; i < data.length; ++i) {
+      data[i] = null;
+    }
+    initializedChunks = 0;
+  }
+
+  /**
+   * Set a text value from the bytes in this dynamic array.
+   * @param result the value to set
+   * @param offset the start of the bytes to copy
+   * @param length the number of bytes to copy
+   */
+  public void setText(Text result, int offset, int length) {
+    result.clear();
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    int currentLength = Math.min(length, chunkSize - currentOffset);
+    while (length > 0) {
+      result.append(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+      currentLength = Math.min(length, chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Write out a range of this dynamic array to an output stream.
+   * @param out the stream to write to
+   * @param offset the first offset to write
+   * @param length the number of bytes to write
+   * @throws IOException
+   */
+  public void write(OutputStream out, int offset,
+                    int length) throws IOException {
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    while (length > 0) {
+      int currentLength = Math.min(length, chunkSize - currentOffset);
+      out.write(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+    }
+  }
+
+  @Override
+  public String toString() {
+    int i;
+    StringBuilder sb = new StringBuilder(length * 3);
+
+    sb.append('{');
+    int l = length - 1;
+    for (i=0; i<l; i++) {
+      sb.append(Integer.toHexString(get(i)));
+      sb.append(',');
+    }
+    sb.append(get(i));
+    sb.append('}');
+
+    return sb.toString();
+  }
+
+  public void setByteBuffer(ByteBuffer result, int offset, int length) {
+    result.clear();
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    int currentLength = Math.min(length, chunkSize - currentOffset);
+    while (length > 0) {
+      result.put(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+      currentLength = Math.min(length, chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Gets all the bytes of the array.
+   *
+   * @return Bytes of the array
+   */
+  public byte[] get() {
+    byte[] result = null;
+    if (length > 0) {
+      int currentChunk = 0;
+      int currentOffset = 0;
+      int currentLength = Math.min(length, chunkSize);
+      int destOffset = 0;
+      result = new byte[length];
+      int totalLength = length;
+      while (totalLength > 0) {
+        System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength);
+        destOffset += currentLength;
+        totalLength -= currentLength;
+        currentChunk += 1;
+        currentOffset = 0;
+        currentLength = Math.min(totalLength, chunkSize - currentOffset);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Get the size of the buffers.
+   */
+  public long getSizeInBytes() {
+    return initializedChunks * chunkSize;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/DynamicIntArray.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/DynamicIntArray.java b/orc/src/java/org/apache/hive/orc/impl/DynamicIntArray.java
new file mode 100644
index 0000000..f44b5b0
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/DynamicIntArray.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+/**
+ * Dynamic int array that uses primitive types and chunks to avoid copying
+ * large number of integers when it resizes.
+ *
+ * The motivation for this class is memory optimization, i.e. space efficient
+ * storage of potentially huge arrays without good a-priori size guesses.
+ *
+ * The API of this class is between a primitive array and a AbstractList. It's
+ * not a Collection implementation because it handles primitive types, but the
+ * API could be extended to support iterators and the like.
+ *
+ * NOTE: Like standard Collection implementations/arrays, this class is not
+ * synchronized.
+ */
+public final class DynamicIntArray {
+  static final int DEFAULT_CHUNKSIZE = 8 * 1024;
+  static final int INIT_CHUNKS = 128;
+
+  private final int chunkSize;       // our allocation size
+  private int[][] data;              // the real data
+  private int length;                // max set element index +1
+  private int initializedChunks = 0; // the number of created chunks
+
+  public DynamicIntArray() {
+    this(DEFAULT_CHUNKSIZE);
+  }
+
+  public DynamicIntArray(int chunkSize) {
+    this.chunkSize = chunkSize;
+
+    data = new int[INIT_CHUNKS][];
+  }
+
+  /**
+   * Ensure that the given index is valid.
+   */
+  private void grow(int chunkIndex) {
+    if (chunkIndex >= initializedChunks) {
+      if (chunkIndex >= data.length) {
+        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+        int[][] newChunk = new int[newSize][];
+        System.arraycopy(data, 0, newChunk, 0, data.length);
+        data = newChunk;
+      }
+      for (int i=initializedChunks; i <= chunkIndex; ++i) {
+        data[i] = new int[chunkSize];
+      }
+      initializedChunks = chunkIndex + 1;
+    }
+  }
+
+  public int get(int index) {
+    if (index >= length) {
+      throw new IndexOutOfBoundsException("Index " + index +
+                                            " is outside of 0.." +
+                                            (length - 1));
+    }
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    return data[i][j];
+  }
+
+  public void set(int index, int value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] = value;
+  }
+
+  public void increment(int index, int value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] += value;
+  }
+
+  public void add(int value) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow(i);
+    data[i][j] = value;
+    length += 1;
+  }
+
+  public int size() {
+    return length;
+  }
+
+  public void clear() {
+    length = 0;
+    for(int i=0; i < data.length; ++i) {
+      data[i] = null;
+    }
+    initializedChunks = 0;
+  }
+
+  public String toString() {
+    int i;
+    StringBuilder sb = new StringBuilder(length * 4);
+
+    sb.append('{');
+    int l = length - 1;
+    for (i=0; i<l; i++) {
+      sb.append(get(i));
+      sb.append(',');
+    }
+    sb.append(get(i));
+    sb.append('}');
+
+    return sb.toString();
+  }
+
+  public int getSizeInBytes() {
+    return 4 * initializedChunks * chunkSize;
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/HadoopShims.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/HadoopShims.java b/orc/src/java/org/apache/hive/orc/impl/HadoopShims.java
new file mode 100644
index 0000000..8c9a624
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/HadoopShims.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.util.VersionInfo;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+public interface HadoopShims {
+
+  enum DirectCompressionType {
+    NONE,
+    ZLIB_NOHEADER,
+    ZLIB,
+    SNAPPY,
+  }
+
+  interface DirectDecompressor {
+    void decompress(ByteBuffer var1, ByteBuffer var2) throws IOException;
+  }
+
+  /**
+   * Get a direct decompressor codec, if it is available
+   * @param codec
+   * @return
+   */
+  DirectDecompressor getDirectDecompressor(DirectCompressionType codec);
+
+  /**
+   * a hadoop.io ByteBufferPool shim.
+   */
+  public interface ByteBufferPoolShim {
+    /**
+     * Get a new ByteBuffer from the pool.  The pool can provide this from
+     * removing a buffer from its internal cache, or by allocating a
+     * new buffer.
+     *
+     * @param direct     Whether the buffer should be direct.
+     * @param length     The minimum length the buffer will have.
+     * @return           A new ByteBuffer. Its capacity can be less
+     *                   than what was requested, but must be at
+     *                   least 1 byte.
+     */
+    ByteBuffer getBuffer(boolean direct, int length);
+
+    /**
+     * Release a buffer back to the pool.
+     * The pool may choose to put this buffer into its cache/free it.
+     *
+     * @param buffer    a direct bytebuffer
+     */
+    void putBuffer(ByteBuffer buffer);
+  }
+
+  /**
+   * Provides an HDFS ZeroCopyReader shim.
+   * @param in FSDataInputStream to read from (where the cached/mmap buffers are tied to)
+   * @param in ByteBufferPoolShim to allocate fallback buffers with
+   *
+   * @return returns null if not supported
+   */
+  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException;
+
+  public interface ZeroCopyReaderShim extends Closeable {
+    /**
+     * Get a ByteBuffer from the FSDataInputStream - this can be either a HeapByteBuffer or an MappedByteBuffer.
+     * Also move the in stream by that amount. The data read can be small than maxLength.
+     *
+     * @return ByteBuffer read from the stream,
+     */
+    public ByteBuffer readBuffer(int maxLength, boolean verifyChecksums) throws IOException;
+    /**
+     * Release a ByteBuffer obtained from a read on the
+     * Also move the in stream by that amount. The data read can be small than maxLength.
+     *
+     */
+    public void releaseBuffer(ByteBuffer buffer);
+
+    /**
+     * Close the underlying stream.
+     * @throws IOException
+     */
+    public void close() throws IOException;
+  }
+  /**
+   * Read data into a Text object in the fastest way possible
+   */
+  public interface TextReaderShim {
+    /**
+     * @param txt
+     * @param size
+     * @return bytes read
+     * @throws IOException
+     */
+    void read(Text txt, int size) throws IOException;
+  }
+
+  /**
+   * Wrap a TextReaderShim around an input stream. The reader shim will not
+   * buffer any reads from the underlying stream and will only consume bytes
+   * which are required for TextReaderShim.read() input.
+   */
+  public TextReaderShim getTextReaderShim(InputStream input) throws IOException;
+
+  class Factory {
+    private static HadoopShims SHIMS = null;
+
+    public static synchronized HadoopShims get() {
+      if (SHIMS == null) {
+        String[] versionParts = VersionInfo.getVersion().split("[.]");
+        int major = Integer.parseInt(versionParts[0]);
+        int minor = Integer.parseInt(versionParts[1]);
+        if (major < 2 || (major == 2 && minor < 3)) {
+          SHIMS = new HadoopShims_2_2();
+        } else {
+          SHIMS = new HadoopShimsCurrent();
+        }
+      }
+      return SHIMS;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/HadoopShimsCurrent.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/HadoopShimsCurrent.java b/orc/src/java/org/apache/hive/orc/impl/HadoopShimsCurrent.java
new file mode 100644
index 0000000..87aa9d7
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/HadoopShimsCurrent.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
+import org.apache.hadoop.io.compress.zlib.ZlibDecompressor;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * Shims for recent versions of Hadoop
+ */
+public class HadoopShimsCurrent implements HadoopShims {
+
+  private static class DirectDecompressWrapper implements DirectDecompressor {
+    private final org.apache.hadoop.io.compress.DirectDecompressor root;
+
+    DirectDecompressWrapper(org.apache.hadoop.io.compress.DirectDecompressor root) {
+      this.root = root;
+    }
+
+    public void decompress(ByteBuffer input,
+                           ByteBuffer output) throws IOException {
+      root.decompress(input, output);
+    }
+  }
+
+  public DirectDecompressor getDirectDecompressor(
+      DirectCompressionType codec) {
+    switch (codec) {
+      case ZLIB:
+        return new DirectDecompressWrapper
+            (new ZlibDecompressor.ZlibDirectDecompressor());
+      case ZLIB_NOHEADER:
+        return new DirectDecompressWrapper
+            (new ZlibDecompressor.ZlibDirectDecompressor
+                (ZlibDecompressor.CompressionHeader.NO_HEADER, 0));
+      case SNAPPY:
+        return new DirectDecompressWrapper
+            (new SnappyDecompressor.SnappyDirectDecompressor());
+      default:
+        return null;
+    }
+  }
+
+  @Override
+  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
+                                              ByteBufferPoolShim pool
+                                              ) throws IOException {
+    return ZeroCopyShims.getZeroCopyReader(in, pool);
+  }
+
+  private final class FastTextReaderShim implements TextReaderShim {
+    private final DataInputStream din;
+
+    public FastTextReaderShim(InputStream in) {
+      this.din = new DataInputStream(in);
+    }
+
+    @Override
+    public void read(Text txt, int len) throws IOException {
+      txt.readWithKnownLength(din, len);
+    }
+  }
+
+  @Override
+  public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
+    return new FastTextReaderShim(in);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/HadoopShims_2_2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/HadoopShims_2_2.java b/orc/src/java/org/apache/hive/orc/impl/HadoopShims_2_2.java
new file mode 100644
index 0000000..ce0be86
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/HadoopShims_2_2.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.Text;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Method;
+
+/**
+ * Shims for versions of Hadoop up to and including 2.2.x
+ */
+public class HadoopShims_2_2 implements HadoopShims {
+
+  final boolean zeroCopy;
+  final boolean fastRead;
+
+  HadoopShims_2_2() {
+    boolean zcr = false;
+    try {
+      Class.forName("org.apache.hadoop.fs.CacheFlag", false,
+        HadoopShims_2_2.class.getClassLoader());
+      zcr = true;
+    } catch (ClassNotFoundException ce) {
+    }
+    zeroCopy = zcr;
+    boolean fastRead = false;
+    if (zcr) {
+      for (Method m : Text.class.getMethods()) {
+        if ("readWithKnownLength".equals(m.getName())) {
+          fastRead = true;
+        }
+      }
+    }
+    this.fastRead = fastRead;
+  }
+
+  public DirectDecompressor getDirectDecompressor(
+      DirectCompressionType codec) {
+    return null;
+  }
+
+  @Override
+  public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in,
+                                              ByteBufferPoolShim pool
+                                              ) throws IOException {
+    if(zeroCopy) {
+      return ZeroCopyShims.getZeroCopyReader(in, pool);
+    }
+    /* not supported */
+    return null;
+  }
+
+  private final class BasicTextReaderShim implements TextReaderShim {
+    private final InputStream in;
+
+    public BasicTextReaderShim(InputStream in) {
+      this.in = in;
+    }
+
+    @Override
+    public void read(Text txt, int len) throws IOException {
+      int offset = 0;
+      byte[] bytes = new byte[len];
+      while (len > 0) {
+        int written = in.read(bytes, offset, len);
+        if (written < 0) {
+          throw new EOFException("Can't finish read from " + in + " read "
+              + (offset) + " bytes out of " + bytes.length);
+        }
+        len -= written;
+        offset += written;
+      }
+      txt.set(bytes);
+    }
+  }
+
+  @Override
+  public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
+    return new BasicTextReaderShim(in);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/InStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/InStream.java b/orc/src/java/org/apache/hive/orc/impl/InStream.java
new file mode 100644
index 0000000..96c9ed3
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/InStream.java
@@ -0,0 +1,498 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.io.DiskRange;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.CodedInputStream;
+
+public abstract class InStream extends InputStream {
+
+  private static final Logger LOG = LoggerFactory.getLogger(InStream.class);
+  public static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
+
+  protected final String name;
+  protected long length;
+
+  public InStream(String name, long length) {
+    this.name = name;
+    this.length = length;
+  }
+
+  public String getStreamName() {
+    return name;
+  }
+
+  public long getStreamLength() {
+    return length;
+  }
+
+  @Override
+  public abstract void close();
+
+  public static class UncompressedStream extends InStream {
+    private List<DiskRange> bytes;
+    private long length;
+    protected long currentOffset;
+    private ByteBuffer range;
+    private int currentRange;
+
+    public UncompressedStream(String name, List<DiskRange> input, long length) {
+      super(name, length);
+      reset(input, length);
+    }
+
+    protected void reset(List<DiskRange> input, long length) {
+      this.bytes = input;
+      this.length = length;
+      currentRange = 0;
+      currentOffset = 0;
+      range = null;
+    }
+
+    @Override
+    public int read() {
+      if (range == null || range.remaining() == 0) {
+        if (currentOffset == length) {
+          return -1;
+        }
+        seek(currentOffset);
+      }
+      currentOffset += 1;
+      return 0xff & range.get();
+    }
+
+    @Override
+    public int read(byte[] data, int offset, int length) {
+      if (range == null || range.remaining() == 0) {
+        if (currentOffset == this.length) {
+          return -1;
+        }
+        seek(currentOffset);
+      }
+      int actualLength = Math.min(length, range.remaining());
+      range.get(data, offset, actualLength);
+      currentOffset += actualLength;
+      return actualLength;
+    }
+
+    @Override
+    public int available() {
+      if (range != null && range.remaining() > 0) {
+        return range.remaining();
+      }
+      return (int) (length - currentOffset);
+    }
+
+    @Override
+    public void close() {
+      currentRange = bytes.size();
+      currentOffset = length;
+      // explicit de-ref of bytes[]
+      bytes.clear();
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      seek(index.getNext());
+    }
+
+    public void seek(long desired) {
+      if (desired == 0 && bytes.isEmpty()) {
+        logEmptySeek(name);
+        return;
+      }
+      int i = 0;
+      for (DiskRange curRange : bytes) {
+        if (desired == 0 && curRange.getData().remaining() == 0) {
+          logEmptySeek(name);
+          return;
+        }
+        if (curRange.getOffset() <= desired &&
+            (desired - curRange.getOffset()) < curRange.getLength()) {
+          currentOffset = desired;
+          currentRange = i;
+          this.range = curRange.getData().duplicate();
+          int pos = range.position();
+          pos += (int)(desired - curRange.getOffset()); // this is why we duplicate
+          this.range.position(pos);
+          return;
+        }
+        ++i;
+      }
+      // if they are seeking to the precise end, go ahead and let them go there
+      int segments = bytes.size();
+      if (segments != 0 && desired == bytes.get(segments - 1).getEnd()) {
+        currentOffset = desired;
+        currentRange = segments - 1;
+        DiskRange curRange = bytes.get(currentRange);
+        this.range = curRange.getData().duplicate();
+        int pos = range.position();
+        pos += (int)(desired - curRange.getOffset()); // this is why we duplicate
+        this.range.position(pos);
+        return;
+      }
+      throw new IllegalArgumentException("Seek in " + name + " to " +
+        desired + " is outside of the data");
+    }
+
+    @Override
+    public String toString() {
+      return "uncompressed stream " + name + " position: " + currentOffset +
+          " length: " + length + " range: " + currentRange +
+          " offset: " + (range == null ? 0 : range.position()) + " limit: " + (range == null ? 0 : range.limit());
+    }
+  }
+
+  private static ByteBuffer allocateBuffer(int size, boolean isDirect) {
+    // TODO: use the same pool as the ORC readers
+    if (isDirect) {
+      return ByteBuffer.allocateDirect(size);
+    } else {
+      return ByteBuffer.allocate(size);
+    }
+  }
+
+  private static class CompressedStream extends InStream {
+    private final List<DiskRange> bytes;
+    private final int bufferSize;
+    private ByteBuffer uncompressed;
+    private final CompressionCodec codec;
+    private ByteBuffer compressed;
+    private long currentOffset;
+    private int currentRange;
+    private boolean isUncompressedOriginal;
+
+    public CompressedStream(String name, List<DiskRange> input, long length,
+                            CompressionCodec codec, int bufferSize) {
+      super(name, length);
+      this.bytes = input;
+      this.codec = codec;
+      this.bufferSize = bufferSize;
+      currentOffset = 0;
+      currentRange = 0;
+    }
+
+    private void allocateForUncompressed(int size, boolean isDirect) {
+      uncompressed = allocateBuffer(size, isDirect);
+    }
+
+    private void readHeader() throws IOException {
+      if (compressed == null || compressed.remaining() <= 0) {
+        seek(currentOffset);
+      }
+      if (compressed.remaining() > OutStream.HEADER_SIZE) {
+        int b0 = compressed.get() & 0xff;
+        int b1 = compressed.get() & 0xff;
+        int b2 = compressed.get() & 0xff;
+        boolean isOriginal = (b0 & 0x01) == 1;
+        int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >> 1);
+
+        if (chunkLength > bufferSize) {
+          throw new IllegalArgumentException("Buffer size too small. size = " +
+              bufferSize + " needed = " + chunkLength);
+        }
+        // read 3 bytes, which should be equal to OutStream.HEADER_SIZE always
+        assert OutStream.HEADER_SIZE == 3 : "The Orc HEADER_SIZE must be the same in OutStream and InStream";
+        currentOffset += OutStream.HEADER_SIZE;
+
+        ByteBuffer slice = this.slice(chunkLength);
+
+        if (isOriginal) {
+          uncompressed = slice;
+          isUncompressedOriginal = true;
+        } else {
+          if (isUncompressedOriginal) {
+            allocateForUncompressed(bufferSize, slice.isDirect());
+            isUncompressedOriginal = false;
+          } else if (uncompressed == null) {
+            allocateForUncompressed(bufferSize, slice.isDirect());
+          } else {
+            uncompressed.clear();
+          }
+          codec.decompress(slice, uncompressed);
+         }
+      } else {
+        throw new IllegalStateException("Can't read header at " + this);
+      }
+    }
+
+    @Override
+    public int read() throws IOException {
+      if (uncompressed == null || uncompressed.remaining() == 0) {
+        if (currentOffset == length) {
+          return -1;
+        }
+        readHeader();
+      }
+      return 0xff & uncompressed.get();
+    }
+
+    @Override
+    public int read(byte[] data, int offset, int length) throws IOException {
+      if (uncompressed == null || uncompressed.remaining() == 0) {
+        if (currentOffset == this.length) {
+          return -1;
+        }
+        readHeader();
+      }
+      int actualLength = Math.min(length, uncompressed.remaining());
+      uncompressed.get(data, offset, actualLength);
+      return actualLength;
+    }
+
+    @Override
+    public int available() throws IOException {
+      if (uncompressed == null || uncompressed.remaining() == 0) {
+        if (currentOffset == length) {
+          return 0;
+        }
+        readHeader();
+      }
+      return uncompressed.remaining();
+    }
+
+    @Override
+    public void close() {
+      uncompressed = null;
+      compressed = null;
+      currentRange = bytes.size();
+      currentOffset = length;
+      bytes.clear();
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      seek(index.getNext());
+      long uncompressedBytes = index.getNext();
+      if (uncompressedBytes != 0) {
+        readHeader();
+        uncompressed.position(uncompressed.position() +
+                              (int) uncompressedBytes);
+      } else if (uncompressed != null) {
+        // mark the uncompressed buffer as done
+        uncompressed.position(uncompressed.limit());
+      }
+    }
+
+    /* slices a read only contiguous buffer of chunkLength */
+    private ByteBuffer slice(int chunkLength) throws IOException {
+      int len = chunkLength;
+      final long oldOffset = currentOffset;
+      ByteBuffer slice;
+      if (compressed.remaining() >= len) {
+        slice = compressed.slice();
+        // simple case
+        slice.limit(len);
+        currentOffset += len;
+        compressed.position(compressed.position() + len);
+        return slice;
+      } else if (currentRange >= (bytes.size() - 1)) {
+        // nothing has been modified yet
+        throw new IOException("EOF in " + this + " while trying to read " +
+            chunkLength + " bytes");
+      }
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format(
+            "Crossing into next BufferChunk because compressed only has %d bytes (needs %d)",
+            compressed.remaining(), len));
+      }
+
+      // we need to consolidate 2 or more buffers into 1
+      // first copy out compressed buffers
+      ByteBuffer copy = allocateBuffer(chunkLength, compressed.isDirect());
+      currentOffset += compressed.remaining();
+      len -= compressed.remaining();
+      copy.put(compressed);
+      ListIterator<DiskRange> iter = bytes.listIterator(currentRange);
+
+      while (len > 0 && iter.hasNext()) {
+        ++currentRange;
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(String.format("Read slow-path, >1 cross block reads with %s", this.toString()));
+        }
+        DiskRange range = iter.next();
+        compressed = range.getData().duplicate();
+        if (compressed.remaining() >= len) {
+          slice = compressed.slice();
+          slice.limit(len);
+          copy.put(slice);
+          currentOffset += len;
+          compressed.position(compressed.position() + len);
+          return copy;
+        }
+        currentOffset += compressed.remaining();
+        len -= compressed.remaining();
+        copy.put(compressed);
+      }
+
+      // restore offsets for exception clarity
+      seek(oldOffset);
+      throw new IOException("EOF in " + this + " while trying to read " +
+          chunkLength + " bytes");
+    }
+
+    private void seek(long desired) throws IOException {
+      if (desired == 0 && bytes.isEmpty()) {
+        logEmptySeek(name);
+        return;
+      }
+      int i = 0;
+      for (DiskRange range : bytes) {
+        if (range.getOffset() <= desired && desired < range.getEnd()) {
+          currentRange = i;
+          compressed = range.getData().duplicate();
+          int pos = compressed.position();
+          pos += (int)(desired - range.getOffset());
+          compressed.position(pos);
+          currentOffset = desired;
+          return;
+        }
+        ++i;
+      }
+      // if they are seeking to the precise end, go ahead and let them go there
+      int segments = bytes.size();
+      if (segments != 0 && desired == bytes.get(segments - 1).getEnd()) {
+        DiskRange range = bytes.get(segments - 1);
+        currentRange = segments - 1;
+        compressed = range.getData().duplicate();
+        compressed.position(compressed.limit());
+        currentOffset = desired;
+        return;
+      }
+      throw new IOException("Seek outside of data in " + this + " to " + desired);
+    }
+
+    private String rangeString() {
+      StringBuilder builder = new StringBuilder();
+      int i = 0;
+      for (DiskRange range : bytes) {
+        if (i != 0) {
+          builder.append("; ");
+        }
+        builder.append(" range " + i + " = " + range.getOffset()
+            + " to " + (range.getEnd() - range.getOffset()));
+        ++i;
+      }
+      return builder.toString();
+    }
+
+    @Override
+    public String toString() {
+      return "compressed stream " + name + " position: " + currentOffset +
+          " length: " + length + " range: " + currentRange +
+          " offset: " + (compressed == null ? 0 : compressed.position()) + " limit: " + (compressed == null ? 0 : compressed.limit()) +
+          rangeString() +
+          (uncompressed == null ? "" :
+              " uncompressed: " + uncompressed.position() + " to " +
+                  uncompressed.limit());
+    }
+  }
+
+  public abstract void seek(PositionProvider index) throws IOException;
+
+  private static void logEmptySeek(String name) {
+    if (LOG.isWarnEnabled()) {
+      LOG.warn("Attempting seek into empty stream (" + name + ") Skipping stream.");
+    }
+  }
+
+  /**
+   * Create an input stream from a list of buffers.
+   * @param streamName the name of the stream
+   * @param buffers the list of ranges of bytes for the stream
+   * @param offsets a list of offsets (the same length as input) that must
+   *                contain the first offset of the each set of bytes in input
+   * @param length the length in bytes of the stream
+   * @param codec the compression codec
+   * @param bufferSize the compression buffer size
+   * @return an input stream
+   * @throws IOException
+   */
+  @VisibleForTesting
+  @Deprecated
+  public static InStream create(String streamName,
+                                ByteBuffer[] buffers,
+                                long[] offsets,
+                                long length,
+                                CompressionCodec codec,
+                                int bufferSize) throws IOException {
+    List<DiskRange> input = new ArrayList<DiskRange>(buffers.length);
+    for (int i = 0; i < buffers.length; ++i) {
+      input.add(new BufferChunk(buffers[i], offsets[i]));
+    }
+    return create(streamName, input, length, codec, bufferSize);
+  }
+
+  /**
+   * Create an input stream from a list of disk ranges with data.
+   * @param name the name of the stream
+   * @param input the list of ranges of bytes for the stream; from disk or cache
+   * @param length the length in bytes of the stream
+   * @param codec the compression codec
+   * @param bufferSize the compression buffer size
+   * @return an input stream
+   * @throws IOException
+   */
+  public static InStream create(String name,
+                                List<DiskRange> input,
+                                long length,
+                                CompressionCodec codec,
+                                int bufferSize) throws IOException {
+    if (codec == null) {
+      return new UncompressedStream(name, input, length);
+    } else {
+      return new CompressedStream(name, input, length, codec, bufferSize);
+    }
+  }
+
+  /**
+   * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
+   *
+   * @param name       the name of the stream
+   * @param input      the list of ranges of bytes for the stream; from disk or cache
+   * @param length     the length in bytes of the stream
+   * @param codec      the compression codec
+   * @param bufferSize the compression buffer size
+   * @return coded input stream
+   * @throws IOException
+   */
+  public static CodedInputStream createCodedInputStream(
+      String name,
+      List<DiskRange> input,
+      long length,
+      CompressionCodec codec,
+      int bufferSize) throws IOException {
+    InStream inStream = create(name, input, length, codec, bufferSize);
+    CodedInputStream codedInputStream = CodedInputStream.newInstance(inStream);
+    codedInputStream.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
+    return codedInputStream;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/IntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/IntegerReader.java b/orc/src/java/org/apache/hive/orc/impl/IntegerReader.java
new file mode 100644
index 0000000..45cab28
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/IntegerReader.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+
+/**
+ * Interface for reading integers.
+ */
+public interface IntegerReader {
+
+  /**
+   * Seek to the position provided by index.
+   * @param index
+   * @throws IOException
+   */
+  void seek(PositionProvider index) throws IOException;
+
+  /**
+   * Skip number of specified rows.
+   * @param numValues
+   * @throws IOException
+   */
+  void skip(long numValues) throws IOException;
+
+  /**
+   * Check if there are any more values left.
+   * @return
+   * @throws IOException
+   */
+  boolean hasNext() throws IOException;
+
+  /**
+   * Return the next available value.
+   * @return
+   * @throws IOException
+   */
+  long next() throws IOException;
+
+  /**
+   * Return the next available vector for values.
+   * @param column the column being read
+   * @param data the vector to read into
+   * @param length the number of numbers to read
+   * @throws IOException
+   */
+   void nextVector(ColumnVector column,
+                   long[] data,
+                   int length
+                   ) throws IOException;
+
+  /**
+   * Return the next available vector for values. Does not change the
+   * value of column.isRepeating.
+   * @param column the column being read
+   * @param data the vector to read into
+   * @param length the number of numbers to read
+   * @throws IOException
+   */
+  void nextVector(ColumnVector column,
+                  int[] data,
+                  int length
+                  ) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/IntegerWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/IntegerWriter.java b/orc/src/java/org/apache/hive/orc/impl/IntegerWriter.java
new file mode 100644
index 0000000..119eed0
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/IntegerWriter.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+
+/**
+ * Interface for writing integers.
+ */
+public interface IntegerWriter {
+
+  /**
+   * Get position from the stream.
+   * @param recorder
+   * @throws IOException
+   */
+  void getPosition(PositionRecorder recorder) throws IOException;
+
+  /**
+   * Write the integer value
+   * @param value
+   * @throws IOException
+   */
+  void write(long value) throws IOException;
+
+  /**
+   * Flush the buffer
+   * @throws IOException
+   */
+  void flush() throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/MemoryManager.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/MemoryManager.java b/orc/src/java/org/apache/hive/orc/impl/MemoryManager.java
new file mode 100644
index 0000000..1dd74ef
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/MemoryManager.java
@@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.OrcConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * Implements a memory manager that keeps a global context of how many ORC
+ * writers there are and manages the memory between them. For use cases with
+ * dynamic partitions, it is easy to end up with many writers in the same task.
+ * By managing the size of each allocation, we try to cut down the size of each
+ * allocation and keep the task from running out of memory.
+ *
+ * This class is not thread safe, but is re-entrant - ensure creation and all
+ * invocations are triggered from the same thread.
+ */
+public class MemoryManager {
+
+  private static final Logger LOG = LoggerFactory.getLogger(MemoryManager.class);
+
+  /**
+   * How often should we check the memory sizes? Measured in rows added
+   * to all of the writers.
+   */
+  private static final int ROWS_BETWEEN_CHECKS = 5000;
+  private final long totalMemoryPool;
+  private final Map<Path, WriterInfo> writerList =
+      new HashMap<Path, WriterInfo>();
+  private long totalAllocation = 0;
+  private double currentScale = 1;
+  private int rowsAddedSinceCheck = 0;
+  private final OwnedLock ownerLock = new OwnedLock();
+
+  @SuppressWarnings("serial")
+  private static class OwnedLock extends ReentrantLock {
+    public Thread getOwner() {
+      return super.getOwner();
+    }
+  }
+
+  private static class WriterInfo {
+    long allocation;
+    Callback callback;
+    WriterInfo(long allocation, Callback callback) {
+      this.allocation = allocation;
+      this.callback = callback;
+    }
+  }
+
+  public interface Callback {
+    /**
+     * The writer needs to check its memory usage
+     * @param newScale the current scale factor for memory allocations
+     * @return true if the writer was over the limit
+     * @throws IOException
+     */
+    boolean checkMemory(double newScale) throws IOException;
+  }
+
+  /**
+   * Create the memory manager.
+   * @param conf use the configuration to find the maximum size of the memory
+   *             pool.
+   */
+  public MemoryManager(Configuration conf) {
+    double maxLoad = OrcConf.MEMORY_POOL.getDouble(conf);
+    totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().
+        getHeapMemoryUsage().getMax() * maxLoad);
+    ownerLock.lock();
+  }
+
+  /**
+   * Light weight thread-safety check for multi-threaded access patterns
+   */
+  private void checkOwner() {
+    if (!ownerLock.isHeldByCurrentThread()) {
+      LOG.warn("Owner thread expected {}, got {}",
+          ownerLock.getOwner(), Thread.currentThread());
+    }
+  }
+
+  /**
+   * Add a new writer's memory allocation to the pool. We use the path
+   * as a unique key to ensure that we don't get duplicates.
+   * @param path the file that is being written
+   * @param requestedAllocation the requested buffer size
+   */
+  public void addWriter(Path path, long requestedAllocation,
+                              Callback callback) throws IOException {
+    checkOwner();
+    WriterInfo oldVal = writerList.get(path);
+    // this should always be null, but we handle the case where the memory
+    // manager wasn't told that a writer wasn't still in use and the task
+    // starts writing to the same path.
+    if (oldVal == null) {
+      oldVal = new WriterInfo(requestedAllocation, callback);
+      writerList.put(path, oldVal);
+      totalAllocation += requestedAllocation;
+    } else {
+      // handle a new writer that is writing to the same path
+      totalAllocation += requestedAllocation - oldVal.allocation;
+      oldVal.allocation = requestedAllocation;
+      oldVal.callback = callback;
+    }
+    updateScale(true);
+  }
+
+  /**
+   * Remove the given writer from the pool.
+   * @param path the file that has been closed
+   */
+  public void removeWriter(Path path) throws IOException {
+    checkOwner();
+    WriterInfo val = writerList.get(path);
+    if (val != null) {
+      writerList.remove(path);
+      totalAllocation -= val.allocation;
+      if (writerList.isEmpty()) {
+        rowsAddedSinceCheck = 0;
+      }
+      updateScale(false);
+    }
+    if(writerList.isEmpty()) {
+      rowsAddedSinceCheck = 0;
+    }
+  }
+
+  /**
+   * Get the total pool size that is available for ORC writers.
+   * @return the number of bytes in the pool
+   */
+  public long getTotalMemoryPool() {
+    return totalMemoryPool;
+  }
+
+  /**
+   * The scaling factor for each allocation to ensure that the pool isn't
+   * oversubscribed.
+   * @return a fraction between 0.0 and 1.0 of the requested size that is
+   * available for each writer.
+   */
+  public double getAllocationScale() {
+    return currentScale;
+  }
+
+  /**
+   * Give the memory manager an opportunity for doing a memory check.
+   * @param rows number of rows added
+   * @throws IOException
+   */
+  public void addedRow(int rows) throws IOException {
+    rowsAddedSinceCheck += rows;
+    if (rowsAddedSinceCheck >= ROWS_BETWEEN_CHECKS) {
+      notifyWriters();
+    }
+  }
+
+  /**
+   * Notify all of the writers that they should check their memory usage.
+   * @throws IOException
+   */
+  public void notifyWriters() throws IOException {
+    checkOwner();
+    LOG.debug("Notifying writers after " + rowsAddedSinceCheck);
+    for(WriterInfo writer: writerList.values()) {
+      boolean flushed = writer.callback.checkMemory(currentScale);
+      if (LOG.isDebugEnabled() && flushed) {
+        LOG.debug("flushed " + writer.toString());
+      }
+    }
+    rowsAddedSinceCheck = 0;
+  }
+
+  /**
+   * Update the currentScale based on the current allocation and pool size.
+   * This also updates the notificationTrigger.
+   * @param isAllocate is this an allocation?
+   */
+  private void updateScale(boolean isAllocate) throws IOException {
+    if (totalAllocation <= totalMemoryPool) {
+      currentScale = 1;
+    } else {
+      currentScale = (double) totalMemoryPool / totalAllocation;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/OrcAcidUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/OrcAcidUtils.java b/orc/src/java/org/apache/hive/orc/impl/OrcAcidUtils.java
new file mode 100644
index 0000000..4244f24
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/OrcAcidUtils.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.Reader;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+
+public class OrcAcidUtils {
+  public static final String ACID_STATS = "hive.acid.stats";
+  public static final String DELTA_SIDE_FILE_SUFFIX = "_flush_length";
+
+  /**
+   * Get the filename of the ORC ACID side file that contains the lengths
+   * of the intermediate footers.
+   * @param main the main ORC filename
+   * @return the name of the side file
+   */
+  public static Path getSideFile(Path main) {
+    return new Path(main + DELTA_SIDE_FILE_SUFFIX);
+  }
+
+  /**
+   * Read the side file to get the last flush length.
+   * @param fs the file system to use
+   * @param deltaFile the path of the delta file
+   * @return the maximum size of the file to use
+   * @throws IOException
+   */
+  public static long getLastFlushLength(FileSystem fs,
+                                        Path deltaFile) throws IOException {
+    Path lengths = getSideFile(deltaFile);
+    long result = Long.MAX_VALUE;
+    if(!fs.exists(lengths)) {
+      return result;
+    }
+    try (FSDataInputStream stream = fs.open(lengths)) {
+      result = -1;
+      while (stream.available() > 0) {
+        result = stream.readLong();
+      }
+      return result;
+    } catch (IOException ioe) {
+      return result;
+    }
+  }
+
+  private static final Charset utf8 = Charset.forName("UTF-8");
+  private static final CharsetDecoder utf8Decoder = utf8.newDecoder();
+
+  public static AcidStats parseAcidStats(Reader reader) {
+    if (reader.hasMetadataValue(ACID_STATS)) {
+      try {
+        ByteBuffer val = reader.getMetadataValue(ACID_STATS).duplicate();
+        return new AcidStats(utf8Decoder.decode(val).toString());
+      } catch (CharacterCodingException e) {
+        throw new IllegalArgumentException("Bad string encoding for " +
+            ACID_STATS, e);
+      }
+    } else {
+      return null;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/OrcIndex.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/OrcIndex.java b/orc/src/java/org/apache/hive/orc/impl/OrcIndex.java
new file mode 100644
index 0000000..cf2b5fe
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/OrcIndex.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.OrcProto;
+
+public final class OrcIndex {
+  OrcProto.RowIndex[] rowGroupIndex;
+  OrcProto.BloomFilterIndex[] bloomFilterIndex;
+
+  public OrcIndex(OrcProto.RowIndex[] rgIndex, OrcProto.BloomFilterIndex[] bfIndex) {
+    this.rowGroupIndex = rgIndex;
+    this.bloomFilterIndex = bfIndex;
+  }
+
+  public OrcProto.RowIndex[] getRowGroupIndex() {
+    return rowGroupIndex;
+  }
+
+  public OrcProto.BloomFilterIndex[] getBloomFilterIndex() {
+    return bloomFilterIndex;
+  }
+
+  public void setRowGroupIndex(OrcProto.RowIndex[] rowGroupIndex) {
+    this.rowGroupIndex = rowGroupIndex;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/OrcTail.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/OrcTail.java b/orc/src/java/org/apache/hive/orc/impl/OrcTail.java
new file mode 100644
index 0000000..71e3c77
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/OrcTail.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcProto;
+import org.apache.hive.orc.StripeInformation;
+import org.apache.hive.orc.StripeStatistics;
+
+// TODO: Make OrcTail implement FileMetadata or Reader interface
+public final class OrcTail {
+  // postscript + footer - Serialized in OrcSplit
+  private final OrcProto.FileTail fileTail;
+  // serialized representation of metadata, footer and postscript
+  private final ByteBuffer serializedTail;
+  // used to invalidate cache entries
+  private final long fileModificationTime;
+  // lazily deserialized
+  private OrcProto.Metadata metadata;
+
+  public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail) {
+    this(fileTail, serializedTail, -1);
+  }
+
+  public OrcTail(OrcProto.FileTail fileTail, ByteBuffer serializedTail, long fileModificationTime) {
+    this.fileTail = fileTail;
+    this.serializedTail = serializedTail;
+    this.fileModificationTime = fileModificationTime;
+    this.metadata = null;
+  }
+
+  public ByteBuffer getSerializedTail() {
+    return serializedTail;
+  }
+
+  public long getFileModificationTime() {
+    return fileModificationTime;
+  }
+
+  public OrcProto.Footer getFooter() {
+    return fileTail.getFooter();
+  }
+
+  public OrcProto.PostScript getPostScript() {
+    return fileTail.getPostscript();
+  }
+
+  public OrcFile.WriterVersion getWriterVersion() {
+    OrcProto.PostScript ps = fileTail.getPostscript();
+    return (ps.hasWriterVersion()
+        ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+  }
+
+  public List<StripeInformation> getStripes() {
+    List<StripeInformation> result = new ArrayList<>(fileTail.getFooter().getStripesCount());
+    for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
+      result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
+    }
+    return result;
+  }
+
+  public CompressionKind getCompressionKind() {
+    return CompressionKind.valueOf(fileTail.getPostscript().getCompression().name());
+  }
+
+  public CompressionCodec getCompressionCodec() {
+    return PhysicalFsWriter.createCodec(getCompressionKind());
+  }
+
+  public int getCompressionBufferSize() {
+    return (int) fileTail.getPostscript().getCompressionBlockSize();
+  }
+
+  public List<StripeStatistics> getStripeStatistics() throws IOException {
+    List<StripeStatistics> result = new ArrayList<>();
+    List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
+    if (ssProto != null) {
+      for (OrcProto.StripeStatistics ss : ssProto) {
+        result.add(new StripeStatistics(ss.getColStatsList()));
+      }
+    }
+    return result;
+  }
+
+  public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
+    if (serializedTail == null) return null;
+    if (metadata == null) {
+      metadata = ReaderImpl.extractMetadata(serializedTail, 0,
+          (int) fileTail.getPostscript().getMetadataLength(),
+          getCompressionCodec(), getCompressionBufferSize());
+      // clear does not clear the contents but sets position to 0 and limit = capacity
+      serializedTail.clear();
+    }
+    return metadata.getStripeStatsList();
+  }
+
+  public int getMetadataSize() {
+    return (int) getPostScript().getMetadataLength();
+  }
+
+  public List<OrcProto.Type> getTypes() {
+    return getFooter().getTypesList();
+  }
+
+  public OrcProto.FileTail getFileTail() {
+    return fileTail;
+  }
+
+  public OrcProto.FileTail getMinimalFileTail() {
+    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder(fileTail);
+    OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
+    footerBuilder.clearStatistics();
+    fileTailBuilder.setFooter(footerBuilder.build());
+    OrcProto.FileTail result = fileTailBuilder.build();
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/impl/OutStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/impl/OutStream.java b/orc/src/java/org/apache/hive/orc/impl/OutStream.java
new file mode 100644
index 0000000..7157ac5
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/impl/OutStream.java
@@ -0,0 +1,289 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class OutStream extends PositionedOutputStream {
+
+  public interface OutputReceiver {
+    /**
+     * Output the given buffer to the final destination
+     * @param buffer the buffer to output
+     * @throws IOException
+     */
+    void output(ByteBuffer buffer) throws IOException;
+  }
+
+  public static final int HEADER_SIZE = 3;
+  private final String name;
+  private final OutputReceiver receiver;
+  // if enabled the stream will be suppressed when writing stripe
+  private boolean suppress;
+
+  /**
+   * Stores the uncompressed bytes that have been serialized, but not
+   * compressed yet. When this fills, we compress the entire buffer.
+   */
+  private ByteBuffer current = null;
+
+  /**
+   * Stores the compressed bytes until we have a full buffer and then outputs
+   * them to the receiver. If no compression is being done, this (and overflow)
+   * will always be null and the current buffer will be sent directly to the
+   * receiver.
+   */
+  private ByteBuffer compressed = null;
+
+  /**
+   * Since the compressed buffer may start with contents from previous
+   * compression blocks, we allocate an overflow buffer so that the
+   * output of the codec can be split between the two buffers. After the
+   * compressed buffer is sent to the receiver, the overflow buffer becomes
+   * the new compressed buffer.
+   */
+  private ByteBuffer overflow = null;
+  private final int bufferSize;
+  private final CompressionCodec codec;
+  private long compressedBytes = 0;
+  private long uncompressedBytes = 0;
+
+  public OutStream(String name,
+                   int bufferSize,
+                   CompressionCodec codec,
+                   OutputReceiver receiver) throws IOException {
+    this.name = name;
+    this.bufferSize = bufferSize;
+    this.codec = codec;
+    this.receiver = receiver;
+    this.suppress = false;
+  }
+
+  public void clear() throws IOException {
+    flush();
+    suppress = false;
+  }
+
+  /**
+   * Write the length of the compressed bytes. Life is much easier if the
+   * header is constant length, so just use 3 bytes. Considering most of the
+   * codecs want between 32k (snappy) and 256k (lzo, zlib), 3 bytes should
+   * be plenty. We also use the low bit for whether it is the original or
+   * compressed bytes.
+   * @param buffer the buffer to write the header to
+   * @param position the position in the buffer to write at
+   * @param val the size in the file
+   * @param original is it uncompressed
+   */
+  private static void writeHeader(ByteBuffer buffer,
+                                  int position,
+                                  int val,
+                                  boolean original) {
+    buffer.put(position, (byte) ((val << 1) + (original ? 1 : 0)));
+    buffer.put(position + 1, (byte) (val >> 7));
+    buffer.put(position + 2, (byte) (val >> 15));
+  }
+
+  private void getNewInputBuffer() throws IOException {
+    if (codec == null) {
+      current = ByteBuffer.allocate(bufferSize);
+    } else {
+      current = ByteBuffer.allocate(bufferSize + HEADER_SIZE);
+      writeHeader(current, 0, bufferSize, true);
+      current.position(HEADER_SIZE);
+    }
+  }
+
+  /**
+   * Allocate a new output buffer if we are compressing.
+   */
+  private ByteBuffer getNewOutputBuffer() throws IOException {
+    return ByteBuffer.allocate(bufferSize + HEADER_SIZE);
+  }
+
+  private void flip() throws IOException {
+    current.limit(current.position());
+    current.position(codec == null ? 0 : HEADER_SIZE);
+  }
+
+  @Override
+  public void write(int i) throws IOException {
+    if (current == null) {
+      getNewInputBuffer();
+    }
+    if (current.remaining() < 1) {
+      spill();
+    }
+    uncompressedBytes += 1;
+    current.put((byte) i);
+  }
+
+  @Override
+  public void write(byte[] bytes, int offset, int length) throws IOException {
+    if (current == null) {
+      getNewInputBuffer();
+    }
+    int remaining = Math.min(current.remaining(), length);
+    current.put(bytes, offset, remaining);
+    uncompressedBytes += remaining;
+    length -= remaining;
+    while (length != 0) {
+      spill();
+      offset += remaining;
+      remaining = Math.min(current.remaining(), length);
+      current.put(bytes, offset, remaining);
+      uncompressedBytes += remaining;
+      length -= remaining;
+    }
+  }
+
+  private void spill() throws java.io.IOException {
+    // if there isn't anything in the current buffer, don't spill
+    if (current == null ||
+        current.position() == (codec == null ? 0 : HEADER_SIZE)) {
+      return;
+    }
+    flip();
+    if (codec == null) {
+      receiver.output(current);
+      getNewInputBuffer();
+    } else {
+      if (compressed == null) {
+        compressed = getNewOutputBuffer();
+      } else if (overflow == null) {
+        overflow = getNewOutputBuffer();
+      }
+      int sizePosn = compressed.position();
+      compressed.position(compressed.position() + HEADER_SIZE);
+      if (codec.compress(current, compressed, overflow)) {
+        uncompressedBytes = 0;
+        // move position back to after the header
+        current.position(HEADER_SIZE);
+        current.limit(current.capacity());
+        // find the total bytes in the chunk
+        int totalBytes = compressed.position() - sizePosn - HEADER_SIZE;
+        if (overflow != null) {
+          totalBytes += overflow.position();
+        }
+        compressedBytes += totalBytes + HEADER_SIZE;
+        writeHeader(compressed, sizePosn, totalBytes, false);
+        // if we have less than the next header left, spill it.
+        if (compressed.remaining() < HEADER_SIZE) {
+          compressed.flip();
+          receiver.output(compressed);
+          compressed = overflow;
+          overflow = null;
+        }
+      } else {
+        compressedBytes += uncompressedBytes + HEADER_SIZE;
+        uncompressedBytes = 0;
+        // we are using the original, but need to spill the current
+        // compressed buffer first. So back up to where we started,
+        // flip it and add it to done.
+        if (sizePosn != 0) {
+          compressed.position(sizePosn);
+          compressed.flip();
+          receiver.output(compressed);
+          compressed = null;
+          // if we have an overflow, clear it and make it the new compress
+          // buffer
+          if (overflow != null) {
+            overflow.clear();
+            compressed = overflow;
+            overflow = null;
+          }
+        } else {
+          compressed.clear();
+          if (overflow != null) {
+            overflow.clear();
+          }
+        }
+
+        // now add the current buffer into the done list and get a new one.
+        current.position(0);
+        // update the header with the current length
+        writeHeader(current, 0, current.limit() - HEADER_SIZE, true);
+        receiver.output(current);
+        getNewInputBuffer();
+      }
+    }
+  }
+
+  @Override
+  public void getPosition(PositionRecorder recorder) throws IOException {
+    if (codec == null) {
+      recorder.addPosition(uncompressedBytes);
+    } else {
+      recorder.addPosition(compressedBytes);
+      recorder.addPosition(uncompressedBytes);
+    }
+  }
+
+  @Override
+  public void flush() throws IOException {
+    spill();
+    if (compressed != null && compressed.position() != 0) {
+      compressed.flip();
+      receiver.output(compressed);
+    }
+    compressed = null;
+    uncompressedBytes = 0;
+    compressedBytes = 0;
+    overflow = null;
+    current = null;
+  }
+
+  @Override
+  public String toString() {
+    return name;
+  }
+
+  @Override
+  public long getBufferSize() {
+    long result = 0;
+    if (current != null) {
+      result += current.capacity();
+    }
+    if (compressed != null) {
+      result += compressed.capacity();
+    }
+    if (overflow != null) {
+      result += overflow.capacity();
+    }
+    return result;
+  }
+
+  /**
+   * Set suppress flag
+   */
+  public void suppress() {
+    suppress = true;
+  }
+
+  /**
+   * Returns the state of suppress flag
+   * @return value of suppress flag
+   */
+  public boolean isSuppressed() {
+    return suppress;
+  }
+}
+


[21/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
deleted file mode 100644
index 2d293b5..0000000
--- a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ /dev/null
@@ -1,2893 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.EnumMap;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.ql.util.TimestampUtils;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.orc.OrcProto;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.TypeDescription.Category;
-
-/**
- * Convert ORC tree readers.
- */
-public class ConvertTreeReaderFactory extends TreeReaderFactory {
-
-  /**
-   * Override methods like checkEncoding to pass-thru to the convert TreeReader.
-   */
-  public static class ConvertTreeReader extends TreeReader {
-
-    private TreeReader convertTreeReader;
-
-    ConvertTreeReader(int columnId) throws IOException {
-      super(columnId);
-    }
-
-    // The ordering of types here is used to determine which numeric types
-    // are common/convertible to one another. Probably better to rely on the
-    // ordering explicitly defined here than to assume that the enum values
-    // that were arbitrarily assigned in PrimitiveCategory work for our purposes.
-    private static EnumMap<TypeDescription.Category, Integer> numericTypes =
-        new EnumMap<>(TypeDescription.Category.class);
-
-    static {
-      registerNumericType(TypeDescription.Category.BOOLEAN, 1);
-      registerNumericType(TypeDescription.Category.BYTE, 2);
-      registerNumericType(TypeDescription.Category.SHORT, 3);
-      registerNumericType(TypeDescription.Category.INT, 4);
-      registerNumericType(TypeDescription.Category.LONG, 5);
-      registerNumericType(TypeDescription.Category.FLOAT, 6);
-      registerNumericType(TypeDescription.Category.DOUBLE, 7);
-      registerNumericType(TypeDescription.Category.DECIMAL, 8);
-    }
-
-    private static void registerNumericType(TypeDescription.Category kind, int level) {
-      numericTypes.put(kind, level);
-    }
-
-    protected void setConvertTreeReader(TreeReader convertTreeReader) {
-      this.convertTreeReader = convertTreeReader;
-    }
-
-    protected TreeReader getStringGroupTreeReader(int columnId,
-        TypeDescription fileType) throws IOException {
-      switch (fileType.getCategory()) {
-      case STRING:
-        return new StringTreeReader(columnId);
-      case CHAR:
-        return new CharTreeReader(columnId, fileType.getMaxLength());
-      case VARCHAR:
-        return new VarcharTreeReader(columnId, fileType.getMaxLength());
-      default:
-        throw new RuntimeException("Unexpected type kind " + fileType.getCategory().name());
-      }
-    }
-
-    protected void assignStringGroupVectorEntry(BytesColumnVector bytesColVector,
-        int elementNum, TypeDescription readerType, byte[] bytes) {
-      assignStringGroupVectorEntry(bytesColVector,
-          elementNum, readerType, bytes, 0, bytes.length);
-    }
-
-    /*
-     * Assign a BytesColumnVector entry when we have a byte array, start, and
-     * length for the string group which can be (STRING, CHAR, VARCHAR).
-     */
-    protected void assignStringGroupVectorEntry(BytesColumnVector bytesColVector,
-        int elementNum, TypeDescription readerType, byte[] bytes, int start, int length) {
-      switch (readerType.getCategory()) {
-      case STRING:
-        bytesColVector.setVal(elementNum, bytes, start, length);
-        break;
-      case CHAR:
-        {
-          int adjustedDownLen =
-              StringExpr.rightTrimAndTruncate(bytes, start, length, readerType.getMaxLength());
-          bytesColVector.setVal(elementNum, bytes, start, adjustedDownLen);
-        }
-        break;
-      case VARCHAR:
-        {
-          int adjustedDownLen =
-              StringExpr.truncate(bytes, start, length, readerType.getMaxLength());
-          bytesColVector.setVal(elementNum, bytes, start, adjustedDownLen);
-        }
-        break;
-      default:
-        throw new RuntimeException("Unexpected type kind " + readerType.getCategory().name());
-      }
-    }
-
-    protected void convertStringGroupVectorElement(BytesColumnVector bytesColVector,
-        int elementNum, TypeDescription readerType) {
-      switch (readerType.getCategory()) {
-      case STRING:
-        // No conversion needed.
-        break;
-      case CHAR:
-        {
-          int length = bytesColVector.length[elementNum];
-          int adjustedDownLen = StringExpr
-            .rightTrimAndTruncate(bytesColVector.vector[elementNum],
-                bytesColVector.start[elementNum], length,
-                readerType.getMaxLength());
-          if (adjustedDownLen < length) {
-            bytesColVector.length[elementNum] = adjustedDownLen;
-          }
-        }
-        break;
-      case VARCHAR:
-        {
-          int length = bytesColVector.length[elementNum];
-          int adjustedDownLen = StringExpr
-            .truncate(bytesColVector.vector[elementNum],
-                bytesColVector.start[elementNum], length,
-                readerType.getMaxLength());
-          if (adjustedDownLen < length) {
-            bytesColVector.length[elementNum] = adjustedDownLen;
-          }
-        }
-        break;
-      default:
-        throw new RuntimeException("Unexpected type kind " + readerType.getCategory().name());
-      }
-    }
-
-    private boolean isParseError;
-
-    /*
-     * We do this because we want the various parse methods return a primitive.
-     *
-     * @return true if there was a parse error in the last call to
-     * parseLongFromString, etc.
-     */
-    protected boolean getIsParseError() {
-      return isParseError;
-    }
-
-    protected long parseLongFromString(String string) {
-      try {
-        long longValue = Long.parseLong(string);
-        isParseError = false;
-        return longValue;
-      } catch (NumberFormatException e) {
-        isParseError = true;
-        return 0;
-      }
-    }
-
-    protected float parseFloatFromString(String string) {
-      try {
-        float floatValue = Float.parseFloat(string);
-        isParseError = false;
-        return floatValue;
-      } catch (NumberFormatException e) {
-        isParseError = true;
-        return Float.NaN;
-      }
-    }
-
-    protected double parseDoubleFromString(String string) {
-      try {
-        double value = Double.parseDouble(string);
-        isParseError = false;
-        return value;
-      } catch (NumberFormatException e) {
-        isParseError = true;
-        return Double.NaN;
-      }
-    }
-
-    /**
-     * @param string
-     * @return the HiveDecimal parsed, or null if there was a parse error.
-     */
-    protected HiveDecimal parseDecimalFromString(String string) {
-      try {
-        HiveDecimal value = HiveDecimal.create(string);
-        return value;
-      } catch (NumberFormatException e) {
-        return null;
-      }
-    }
-
-    /**
-     * @param string
-     * @return the Timestamp parsed, or null if there was a parse error.
-     */
-    protected Timestamp parseTimestampFromString(String string) {
-      try {
-        Timestamp value = Timestamp.valueOf(string);
-        return value;
-      } catch (IllegalArgumentException e) {
-        return null;
-      }
-    }
-
-    /**
-     * @param string
-     * @return the Date parsed, or null if there was a parse error.
-     */
-    protected Date parseDateFromString(String string) {
-      try {
-        Date value = Date.valueOf(string);
-        return value;
-      } catch (IllegalArgumentException e) {
-        return null;
-      }
-    }
-
-    protected String stringFromBytesColumnVectorEntry(
-        BytesColumnVector bytesColVector, int elementNum) {
-      String string;
-
-      string = new String(
-          bytesColVector.vector[elementNum],
-          bytesColVector.start[elementNum], bytesColVector.length[elementNum],
-          StandardCharsets.UTF_8);
-
-      return string;
-    }
-
-    private static final double MIN_LONG_AS_DOUBLE = -0x1p63;
-    /*
-     * We cannot store Long.MAX_VALUE as a double without losing precision. Instead, we store
-     * Long.MAX_VALUE + 1 == -Long.MIN_VALUE, and then offset all comparisons by 1.
-     */
-    private static final double MAX_LONG_AS_DOUBLE_PLUS_ONE = 0x1p63;
-
-    public boolean doubleCanFitInLong(double doubleValue) {
-
-      // Borrowed from Guava DoubleMath.roundToLong except do not want dependency on Guava and we
-      // don't want to catch an exception.
-
-      return ((MIN_LONG_AS_DOUBLE - doubleValue < 1.0) &&
-              (doubleValue < MAX_LONG_AS_DOUBLE_PLUS_ONE));
-    }
-
-    @Override
-    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      // Pass-thru.
-      convertTreeReader.checkEncoding(encoding);
-    }
-
-    @Override
-    void startStripe(Map<StreamName, InStream> streams,
-        OrcProto.StripeFooter stripeFooter
-    ) throws IOException {
-      // Pass-thru.
-      convertTreeReader.startStripe(streams, stripeFooter);
-    }
-
-    @Override
-    public void seek(PositionProvider[] index) throws IOException {
-     // Pass-thru.
-      convertTreeReader.seek(index);
-    }
-
-    @Override
-    public void seek(PositionProvider index) throws IOException {
-      // Pass-thru.
-      convertTreeReader.seek(index);
-    }
-
-    @Override
-    void skipRows(long items) throws IOException {
-      // Pass-thru.
-      convertTreeReader.skipRows(items);
-    }
-
-    /**
-     * Override this to use convertVector.
-     * Source and result are member variables in the subclass with the right
-     * type.
-     * @param elementNum
-     * @throws IOException
-     */
-    // Override this to use convertVector.
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      throw new RuntimeException("Expected this method to be overriden");
-    }
-
-    // Common code used by the conversion.
-    public void convertVector(ColumnVector fromColVector,
-        ColumnVector resultColVector, final int batchSize) throws IOException {
-
-      resultColVector.reset();
-      if (fromColVector.isRepeating) {
-        resultColVector.isRepeating = true;
-        if (fromColVector.noNulls || !fromColVector.isNull[0]) {
-          setConvertVectorElement(0);
-        } else {
-          resultColVector.noNulls = false;
-          resultColVector.isNull[0] = true;
-        }
-      } else if (fromColVector.noNulls){
-        for (int i = 0; i < batchSize; i++) {
-          setConvertVectorElement(i);
-        }
-      } else {
-        for (int i = 0; i < batchSize; i++) {
-          if (!fromColVector.isNull[i]) {
-            setConvertVectorElement(i);
-          } else {
-            resultColVector.noNulls = false;
-            resultColVector.isNull[i] = true;
-          }
-        }
-      }
-    }
-
-    public void downCastAnyInteger(LongColumnVector longColVector, int elementNum,
-        TypeDescription readerType) {
-      downCastAnyInteger(longColVector, elementNum, longColVector.vector[elementNum], readerType);
-    }
-
-    public void downCastAnyInteger(LongColumnVector longColVector, int elementNum, long inputLong,
-        TypeDescription readerType) {
-      long[] vector = longColVector.vector;
-      long outputLong;
-      Category readerCategory = readerType.getCategory();
-      switch (readerCategory) {
-      case BOOLEAN:
-        // No data loss for boolean.
-        vector[elementNum] = inputLong == 0 ? 0 : 1;
-        return;
-      case BYTE:
-        outputLong = (byte) inputLong;
-        break;
-      case SHORT:
-        outputLong = (short) inputLong;
-        break;
-      case INT:
-        outputLong = (int) inputLong;
-        break;
-      case LONG:
-        // No data loss for long.
-        vector[elementNum] = inputLong;
-        return;
-      default:
-        throw new RuntimeException("Unexpected type kind " + readerCategory.name());
-      }
-
-      if (outputLong != inputLong) {
-        // Data loss.
-        longColVector.isNull[elementNum] = true;
-        longColVector.noNulls = false;
-      } else {
-        vector[elementNum] = outputLong;
-      }
-    }
-
-    protected boolean integerDownCastNeeded(TypeDescription fileType, TypeDescription readerType) {
-      Integer fileLevel = numericTypes.get(fileType.getCategory());
-      Integer schemaLevel = numericTypes.get(readerType.getCategory());
-      return (schemaLevel.intValue() < fileLevel.intValue());
-    }
-  }
-
-  public static class AnyIntegerTreeReader extends ConvertTreeReader {
-
-    private TypeDescription.Category fileTypeCategory;
-    private TreeReader anyIntegerTreeReader;
-
-    private long longValue;
-
-    AnyIntegerTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.fileTypeCategory = fileType.getCategory();
-      switch (fileTypeCategory) {
-      case BOOLEAN:
-        anyIntegerTreeReader = new BooleanTreeReader(columnId);
-        break;
-      case BYTE:
-        anyIntegerTreeReader = new ByteTreeReader(columnId);
-        break;
-      case SHORT:
-        anyIntegerTreeReader = new ShortTreeReader(columnId);
-        break;
-      case INT:
-        anyIntegerTreeReader = new IntTreeReader(columnId);
-        break;
-      case LONG:
-        anyIntegerTreeReader = new LongTreeReader(columnId, skipCorrupt);
-        break;
-      default:
-        throw new RuntimeException("Unexpected type kind " + fileType.getCategory().name());
-      }
-      setConvertTreeReader(anyIntegerTreeReader);
-    }
-
-    protected long getLong() throws IOException {
-      return longValue;
-    }
-
-    protected String getString(long longValue) {
-      if (fileTypeCategory == TypeDescription.Category.BOOLEAN) {
-        return longValue == 0 ? "FALSE" : "TRUE";
-      } else {
-        return Long.toString(longValue);
-      }
-    }
-
-    protected String getString() {
-      return getString(longValue);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      anyIntegerTreeReader.nextVector(previousVector, isNull, batchSize);
-    }
-  }
-
-  public static class AnyIntegerFromAnyIntegerTreeReader extends ConvertTreeReader {
-
-    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
-
-    private final TypeDescription readerType;
-    private final boolean downCastNeeded;
-
-    AnyIntegerFromAnyIntegerTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      anyIntegerAsLongTreeReader = new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-      setConvertTreeReader(anyIntegerAsLongTreeReader);
-      downCastNeeded = integerDownCastNeeded(fileType, readerType);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      anyIntegerAsLongTreeReader.nextVector(previousVector, isNull, batchSize);
-      LongColumnVector resultColVector = (LongColumnVector) previousVector;
-      if (downCastNeeded) {
-        if (resultColVector.isRepeating) {
-          if (resultColVector.noNulls || !resultColVector.isNull[0]) {
-            downCastAnyInteger(resultColVector, 0, readerType);
-          } else {
-            // Result remains null.
-          }
-        } else if (resultColVector.noNulls){
-          for (int i = 0; i < batchSize; i++) {
-            downCastAnyInteger(resultColVector, i, readerType);
-          }
-        } else {
-          for (int i = 0; i < batchSize; i++) {
-            if (!resultColVector.isNull[i]) {
-              downCastAnyInteger(resultColVector, i, readerType);
-            } else {
-              // Result remains null.
-            }
-          }
-        }
-      }
-    }
-  }
-
-  public static class AnyIntegerFromFloatTreeReader extends ConvertTreeReader {
-
-    private FloatTreeReader floatTreeReader;
-
-    private final TypeDescription readerType;
-    private DoubleColumnVector doubleColVector;
-    private LongColumnVector longColVector;
-
-    AnyIntegerFromFloatTreeReader(int columnId, TypeDescription readerType)
-        throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      floatTreeReader = new FloatTreeReader(columnId);
-      setConvertTreeReader(floatTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      double doubleValue = doubleColVector.vector[elementNum];
-      if (!doubleCanFitInLong(doubleValue)) {
-        longColVector.isNull[elementNum] = true;
-        longColVector.noNulls = false;
-      } else {
-        // UNDONE: Does the overflow check above using double really work here for float?
-        float floatValue = (float) doubleValue;
-        downCastAnyInteger(longColVector, elementNum, (long) floatValue, readerType);
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class AnyIntegerFromDoubleTreeReader extends ConvertTreeReader {
-
-    private DoubleTreeReader doubleTreeReader;
-
-    private final TypeDescription readerType;
-    private DoubleColumnVector doubleColVector;
-    private LongColumnVector longColVector;
-
-    AnyIntegerFromDoubleTreeReader(int columnId, TypeDescription readerType)
-        throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      doubleTreeReader = new DoubleTreeReader(columnId);
-      setConvertTreeReader(doubleTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      double doubleValue = doubleColVector.vector[elementNum];
-      if (!doubleCanFitInLong(doubleValue)) {
-        longColVector.isNull[elementNum] = true;
-        longColVector.noNulls = false;
-      } else {
-        downCastAnyInteger(longColVector, elementNum, (long) doubleValue, readerType);
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class AnyIntegerFromDecimalTreeReader extends ConvertTreeReader {
-
-    private DecimalTreeReader decimalTreeReader;
-
-    private final int precision;
-    private final int scale;
-    private final TypeDescription readerType;
-    private DecimalColumnVector decimalColVector;
-    private LongColumnVector longColVector;
-
-    AnyIntegerFromDecimalTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType) throws IOException {
-      super(columnId);
-      this.precision = fileType.getPrecision();
-      this.scale = fileType.getScale();
-      this.readerType = readerType;
-      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
-      setConvertTreeReader(decimalTreeReader);
-    }
-
-    private static HiveDecimal DECIMAL_MAX_LONG = HiveDecimal.create(Long.MAX_VALUE);
-    private static HiveDecimal DECIMAL_MIN_LONG = HiveDecimal.create(Long.MIN_VALUE);
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      HiveDecimal decimalValue = decimalColVector.vector[elementNum].getHiveDecimal();
-      if (decimalValue.compareTo(DECIMAL_MAX_LONG) > 0 ||
-          decimalValue.compareTo(DECIMAL_MIN_LONG) < 0) {
-        longColVector.isNull[elementNum] = true;
-        longColVector.noNulls = false;
-      } else {
-        downCastAnyInteger(longColVector, elementNum, decimalValue.longValue(), readerType);
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (decimalColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        decimalColVector = new DecimalColumnVector(precision, scale);
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
-
-      convertVector(decimalColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class AnyIntegerFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private final TypeDescription readerType;
-    private BytesColumnVector bytesColVector;
-    private LongColumnVector longColVector;
-
-    AnyIntegerFromStringGroupTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
-      long longValue = parseLongFromString(string);
-      if (!getIsParseError()) {
-        downCastAnyInteger(longColVector, elementNum, longValue, readerType);
-      } else {
-        longColVector.noNulls = false;
-        longColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (bytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        bytesColVector = new BytesColumnVector();
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
-
-      convertVector(bytesColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class AnyIntegerFromTimestampTreeReader extends ConvertTreeReader {
-
-    private TimestampTreeReader timestampTreeReader;
-
-    private final TypeDescription readerType;
-    private TimestampColumnVector timestampColVector;
-    private LongColumnVector longColVector;
-
-    AnyIntegerFromTimestampTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
-      setConvertTreeReader(timestampTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      // Use TimestampWritable's getSeconds.
-      long longValue = TimestampUtils.millisToSeconds(
-          timestampColVector.asScratchTimestamp(elementNum).getTime());
-      downCastAnyInteger(longColVector, elementNum, longValue, readerType);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (timestampColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        timestampColVector = new TimestampColumnVector();
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
-
-      convertVector(timestampColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class FloatFromAnyIntegerTreeReader extends ConvertTreeReader {
-
-    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
-
-    private LongColumnVector longColVector;
-    private DoubleColumnVector doubleColVector;
-
-    FloatFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      anyIntegerAsLongTreeReader =
-          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-      setConvertTreeReader(anyIntegerAsLongTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      float floatValue = (float) longColVector.vector[elementNum];
-      if (!Float.isNaN(floatValue)) {
-        doubleColVector.vector[elementNum] = floatValue;
-      } else {
-        doubleColVector.vector[elementNum] = Double.NaN;
-        doubleColVector.noNulls = false;
-        doubleColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class FloatFromDoubleTreeReader extends ConvertTreeReader {
-
-    private DoubleTreeReader doubleTreeReader;
-
-    FloatFromDoubleTreeReader(int columnId) throws IOException {
-      super(columnId);
-      doubleTreeReader = new DoubleTreeReader(columnId);
-      setConvertTreeReader(doubleTreeReader);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      doubleTreeReader.nextVector(previousVector, isNull, batchSize);
-
-      DoubleColumnVector resultColVector = (DoubleColumnVector) previousVector;
-      double[] resultVector = resultColVector.vector;
-      if (resultColVector.isRepeating) {
-        if (resultColVector.noNulls || !resultColVector.isNull[0]) {
-          resultVector[0] = (float) resultVector[0];
-        } else {
-          // Remains null.
-        }
-      } else if (resultColVector.noNulls){
-        for (int i = 0; i < batchSize; i++) {
-          resultVector[i] = (float) resultVector[i];
-        }
-      } else {
-        for (int i = 0; i < batchSize; i++) {
-          if (!resultColVector.isNull[i]) {
-            resultVector[i] = (float) resultVector[i];
-          } else {
-            // Remains null.
-          }
-        }
-      }
-    }
-  }
-
-  public static class FloatFromDecimalTreeReader extends ConvertTreeReader {
-
-    private DecimalTreeReader decimalTreeReader;
-
-    private final int precision;
-    private final int scale;
-    private DecimalColumnVector decimalColVector;
-    private DoubleColumnVector doubleColVector;
-
-    FloatFromDecimalTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType) throws IOException {
-      super(columnId);
-      this.precision = fileType.getPrecision();
-      this.scale = fileType.getScale();
-      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
-      setConvertTreeReader(decimalTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      doubleColVector.vector[elementNum] =
-          (float) decimalColVector.vector[elementNum].doubleValue();
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (decimalColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        decimalColVector = new DecimalColumnVector(precision, scale);
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
-
-      convertVector(decimalColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class FloatFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private BytesColumnVector bytesColVector;
-    private DoubleColumnVector doubleColVector;
-
-    FloatFromStringGroupTreeReader(int columnId, TypeDescription fileType)
-        throws IOException {
-      super(columnId);
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
-      float floatValue = parseFloatFromString(string);
-      if (!getIsParseError()) {
-        doubleColVector.vector[elementNum] = floatValue;
-      } else {
-        doubleColVector.vector[elementNum] = Double.NaN;
-        doubleColVector.noNulls = false;
-        doubleColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (bytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        bytesColVector = new BytesColumnVector();
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
-
-      convertVector(bytesColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class FloatFromTimestampTreeReader extends ConvertTreeReader {
-
-    private TimestampTreeReader timestampTreeReader;
-
-    private TimestampColumnVector timestampColVector;
-    private DoubleColumnVector doubleColVector;
-
-    FloatFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
-      setConvertTreeReader(timestampTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      doubleColVector.vector[elementNum] = (float) TimestampUtils.getDouble(
-          timestampColVector.asScratchTimestamp(elementNum));
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (timestampColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        timestampColVector = new TimestampColumnVector();
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
-
-      convertVector(timestampColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class DoubleFromAnyIntegerTreeReader extends ConvertTreeReader {
-
-    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
-
-    private LongColumnVector longColVector;
-    private DoubleColumnVector doubleColVector;
-
-    DoubleFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      anyIntegerAsLongTreeReader =
-          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-      setConvertTreeReader(anyIntegerAsLongTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-
-      double doubleValue = (double) longColVector.vector[elementNum];
-      if (!Double.isNaN(doubleValue)) {
-        doubleColVector.vector[elementNum] = doubleValue;
-      } else {
-        doubleColVector.vector[elementNum] = Double.NaN;
-        doubleColVector.noNulls = false;
-        doubleColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class DoubleFromFloatTreeReader extends ConvertTreeReader {
-
-    private FloatTreeReader floatTreeReader;
-
-    DoubleFromFloatTreeReader(int columnId) throws IOException {
-      super(columnId);
-      floatTreeReader = new FloatTreeReader(columnId);
-      setConvertTreeReader(floatTreeReader);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      // we get the DoubleColumnVector produced by float tree reader first, then iterate through
-      // the elements and make double -> float -> string -> double conversion to preserve the
-      // precision. When float tree reader reads float and assign it to double, java's widening
-      // conversion adds more precision which will break all comparisons.
-      // Example: float f = 74.72
-      // double d = f ---> 74.72000122070312
-      // Double.parseDouble(String.valueOf(f)) ---> 74.72
-      floatTreeReader.nextVector(previousVector, isNull, batchSize);
-
-      DoubleColumnVector doubleColumnVector = (DoubleColumnVector) previousVector;
-      if (doubleColumnVector.isRepeating) {
-        if (doubleColumnVector.noNulls || !doubleColumnVector.isNull[0]) {
-          final float f = (float) doubleColumnVector.vector[0];
-          doubleColumnVector.vector[0] = Double.parseDouble(String.valueOf(f));
-        }
-      } else if (doubleColumnVector.noNulls){
-        for (int i = 0; i < batchSize; i++) {
-          final float f = (float) doubleColumnVector.vector[i];
-          doubleColumnVector.vector[i] = Double.parseDouble(String.valueOf(f));
-        }
-      } else {
-        for (int i = 0; i < batchSize; i++) {
-          if (!doubleColumnVector.isNull[i]) {
-            final float f = (float) doubleColumnVector.vector[i];
-            doubleColumnVector.vector[i] = Double.parseDouble(String.valueOf(f));
-          }
-        }
-      }
-    }
-  }
-
-  public static class DoubleFromDecimalTreeReader extends ConvertTreeReader {
-
-    private DecimalTreeReader decimalTreeReader;
-
-    private final int precision;
-    private final int scale;
-    private DecimalColumnVector decimalColVector;
-    private DoubleColumnVector doubleColVector;
-
-    DoubleFromDecimalTreeReader(int columnId, TypeDescription fileType) throws IOException {
-      super(columnId);
-      this.precision = fileType.getPrecision();
-      this.scale = fileType.getScale();
-      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
-      setConvertTreeReader(decimalTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      doubleColVector.vector[elementNum] =
-          decimalColVector.vector[elementNum].doubleValue();
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (decimalColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        decimalColVector = new DecimalColumnVector(precision, scale);
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
-
-      convertVector(decimalColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class DoubleFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private BytesColumnVector bytesColVector;
-    private DoubleColumnVector doubleColVector;
-
-    DoubleFromStringGroupTreeReader(int columnId, TypeDescription fileType)
-        throws IOException {
-      super(columnId);
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
-      double doubleValue = parseDoubleFromString(string);
-      if (!getIsParseError()) {
-        doubleColVector.vector[elementNum] = doubleValue;
-      } else {
-        doubleColVector.noNulls = false;
-        doubleColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (bytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        bytesColVector = new BytesColumnVector();
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
-
-      convertVector(bytesColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class DoubleFromTimestampTreeReader extends ConvertTreeReader {
-
-    private TimestampTreeReader timestampTreeReader;
-
-    private TimestampColumnVector timestampColVector;
-    private DoubleColumnVector doubleColVector;
-
-    DoubleFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
-      setConvertTreeReader(timestampTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      doubleColVector.vector[elementNum] = TimestampUtils.getDouble(
-          timestampColVector.asScratchTimestamp(elementNum));
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (timestampColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        timestampColVector = new TimestampColumnVector();
-        doubleColVector = (DoubleColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
-
-      convertVector(timestampColVector, doubleColVector, batchSize);
-    }
-  }
-
-  public static class DecimalFromAnyIntegerTreeReader extends ConvertTreeReader {
-
-    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
-
-    private LongColumnVector longColVector;
-    private DecimalColumnVector decimalColVector;
-
-    DecimalFromAnyIntegerTreeReader(int columnId, TypeDescription fileType, boolean skipCorrupt)
-        throws IOException {
-      super(columnId);
-      anyIntegerAsLongTreeReader =
-          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-      setConvertTreeReader(anyIntegerAsLongTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      long longValue = longColVector.vector[elementNum];
-      HiveDecimalWritable hiveDecimalWritable = new HiveDecimalWritable(longValue);
-      // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
-      decimalColVector.set(elementNum, hiveDecimalWritable);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-        boolean[] isNull,
-        final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        decimalColVector = (DecimalColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, decimalColVector, batchSize);
-    }
-  }
-
-  public static class DecimalFromFloatTreeReader extends ConvertTreeReader {
-
-    private FloatTreeReader floatTreeReader;
-
-    private DoubleColumnVector doubleColVector;
-    private DecimalColumnVector decimalColVector;
-
-    DecimalFromFloatTreeReader(int columnId, TypeDescription readerType)
-        throws IOException {
-      super(columnId);
-      floatTreeReader = new FloatTreeReader(columnId);
-      setConvertTreeReader(floatTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      float floatValue = (float) doubleColVector.vector[elementNum];
-      if (!Float.isNaN(floatValue)) {
-        HiveDecimal decimalValue =
-            HiveDecimal.create(Float.toString(floatValue));
-        if (decimalValue != null) {
-          // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
-          decimalColVector.set(elementNum, decimalValue);
-        } else {
-          decimalColVector.noNulls = false;
-          decimalColVector.isNull[elementNum] = true;
-        }
-      } else {
-        decimalColVector.noNulls = false;
-        decimalColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        decimalColVector = (DecimalColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, decimalColVector, batchSize);
-    }
-  }
-
-  public static class DecimalFromDoubleTreeReader extends ConvertTreeReader {
-
-    private DoubleTreeReader doubleTreeReader;
-
-    private DoubleColumnVector doubleColVector;
-    private DecimalColumnVector decimalColVector;
-
-    DecimalFromDoubleTreeReader(int columnId, TypeDescription readerType)
-        throws IOException {
-      super(columnId);
-      doubleTreeReader = new DoubleTreeReader(columnId);
-      setConvertTreeReader(doubleTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      HiveDecimal value =
-          HiveDecimal.create(Double.toString(doubleColVector.vector[elementNum]));
-      if (value != null) {
-        decimalColVector.set(elementNum, value);
-      } else {
-        decimalColVector.noNulls = false;
-        decimalColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        decimalColVector = (DecimalColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, decimalColVector, batchSize);
-    }
-  }
-
-  public static class DecimalFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private BytesColumnVector bytesColVector;
-    private DecimalColumnVector decimalColVector;
-
-    DecimalFromStringGroupTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType) throws IOException {
-      super(columnId);
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String string = stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
-      HiveDecimal value = parseDecimalFromString(string);
-      if (value != null) {
-        // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
-        decimalColVector.set(elementNum, value);
-      } else {
-        decimalColVector.noNulls = false;
-        decimalColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (bytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        bytesColVector = new BytesColumnVector();
-        decimalColVector = (DecimalColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
-
-      convertVector(bytesColVector, decimalColVector, batchSize);
-    }
-  }
-
-  public static class DecimalFromTimestampTreeReader extends ConvertTreeReader {
-
-    private TimestampTreeReader timestampTreeReader;
-
-    private TimestampColumnVector timestampColVector;
-    private DecimalColumnVector decimalColVector;
-
-    DecimalFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
-      setConvertTreeReader(timestampTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      double doubleValue = TimestampUtils.getDouble(
-          timestampColVector.asScratchTimestamp(elementNum));
-      HiveDecimal value = HiveDecimal.create(Double.toString(doubleValue));
-      if (value != null) {
-        // The DecimalColumnVector will enforce precision and scale and set the entry to null when out of bounds.
-        decimalColVector.set(elementNum, value);
-      } else {
-        decimalColVector.noNulls = false;
-        decimalColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (timestampColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        timestampColVector = new TimestampColumnVector();
-        decimalColVector = (DecimalColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
-
-      convertVector(timestampColVector, decimalColVector, batchSize);
-    }
-  }
-
-  public static class DecimalFromDecimalTreeReader extends ConvertTreeReader {
-
-    private DecimalTreeReader decimalTreeReader;
-
-    private DecimalColumnVector fileDecimalColVector;
-    private int filePrecision;
-    private int fileScale;
-    private int readerPrecision;
-    private int readerScale;
-    private DecimalColumnVector decimalColVector;
-
-    DecimalFromDecimalTreeReader(int columnId, TypeDescription fileType, TypeDescription readerType)
-        throws IOException {
-      super(columnId);
-      filePrecision = fileType.getPrecision();
-      fileScale = fileType.getScale();
-      readerPrecision = readerType.getPrecision();
-      readerScale = readerType.getScale();
-      decimalTreeReader = new DecimalTreeReader(columnId, filePrecision, fileScale);
-      setConvertTreeReader(decimalTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-
-      decimalColVector.set(elementNum, fileDecimalColVector.vector[elementNum]);
-
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (fileDecimalColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        fileDecimalColVector = new DecimalColumnVector(filePrecision, fileScale);
-        decimalColVector = (DecimalColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      decimalTreeReader.nextVector(fileDecimalColVector, isNull, batchSize);
-
-      convertVector(fileDecimalColVector, decimalColVector, batchSize);
-    }
-  }
-
-  public static class StringGroupFromAnyIntegerTreeReader extends ConvertTreeReader {
-
-    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
-
-    private final TypeDescription readerType;
-    private LongColumnVector longColVector;
-    private BytesColumnVector bytesColVector;
-
-    StringGroupFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      anyIntegerAsLongTreeReader =
-          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-      setConvertTreeReader(anyIntegerAsLongTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      long longValue = longColVector.vector[elementNum];
-      String string = anyIntegerAsLongTreeReader.getString(longValue);
-      byte[] bytes = string.getBytes();
-      assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        bytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, bytesColVector, batchSize);
-    }
-  }
-
-  public static class StringGroupFromFloatTreeReader extends ConvertTreeReader {
-
-    private FloatTreeReader floatTreeReader;
-
-    private final TypeDescription readerType;
-    private DoubleColumnVector doubleColVector;
-    private BytesColumnVector bytesColVector;
-
-
-    StringGroupFromFloatTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      floatTreeReader = new FloatTreeReader(columnId);
-      setConvertTreeReader(floatTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      float floatValue = (float) doubleColVector.vector[elementNum];
-      if (!Float.isNaN(floatValue)) {
-        String string = String.valueOf(floatValue);
-        byte[] bytes = string.getBytes();
-        assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
-      } else {
-        bytesColVector.noNulls = false;
-        bytesColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        bytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, bytesColVector, batchSize);
-    }
-  }
-
-  public static class StringGroupFromDoubleTreeReader extends ConvertTreeReader {
-
-    private DoubleTreeReader doubleTreeReader;
-
-    private final TypeDescription readerType;
-    private DoubleColumnVector doubleColVector;
-    private BytesColumnVector bytesColVector;
-
-    StringGroupFromDoubleTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      doubleTreeReader = new DoubleTreeReader(columnId);
-      setConvertTreeReader(doubleTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      double doubleValue = doubleColVector.vector[elementNum];
-      if (!Double.isNaN(doubleValue)) {
-        String string = String.valueOf(doubleValue);
-        byte[] bytes = string.getBytes();
-        assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
-      } else {
-        bytesColVector.noNulls = false;
-        bytesColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        bytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, bytesColVector, batchSize);
-    }
-  }
-
-
-
-  public static class StringGroupFromDecimalTreeReader extends ConvertTreeReader {
-
-    private DecimalTreeReader decimalTreeReader;
-
-    private int precision;
-    private int scale;
-    private final TypeDescription readerType;
-    private DecimalColumnVector decimalColVector;
-    private BytesColumnVector bytesColVector;
-    private byte[] scratchBuffer;
-
-    StringGroupFromDecimalTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.precision = fileType.getPrecision();
-      this.scale = fileType.getScale();
-      this.readerType = readerType;
-      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
-      setConvertTreeReader(decimalTreeReader);
-      scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      HiveDecimalWritable decWritable = decimalColVector.vector[elementNum];
-
-      // Convert decimal into bytes instead of a String for better performance.
-      final int byteIndex = decWritable.toBytes(scratchBuffer);
-
-      assignStringGroupVectorEntry(
-          bytesColVector, elementNum, readerType,
-          scratchBuffer, byteIndex, HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES - byteIndex);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (decimalColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        decimalColVector = new DecimalColumnVector(precision, scale);
-        bytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
-
-      convertVector(decimalColVector, bytesColVector, batchSize);
-    }
-  }
-
-  public static class StringGroupFromTimestampTreeReader extends ConvertTreeReader {
-
-    private TimestampTreeReader timestampTreeReader;
-
-    private final TypeDescription readerType;
-    private TimestampColumnVector timestampColVector;
-    private BytesColumnVector bytesColVector;
-
-    StringGroupFromTimestampTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
-      setConvertTreeReader(timestampTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String string =
-          timestampColVector.asScratchTimestamp(elementNum).toString();
-      byte[] bytes = string.getBytes();
-      assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (timestampColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        timestampColVector = new TimestampColumnVector();
-        bytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
-
-      convertVector(timestampColVector, bytesColVector, batchSize);
-    }
-  }
-
-  public static class StringGroupFromDateTreeReader extends ConvertTreeReader {
-
-    private DateTreeReader dateTreeReader;
-
-    private final TypeDescription readerType;
-    private LongColumnVector longColVector;
-    private BytesColumnVector bytesColVector;
-    private Date date;
-
-    StringGroupFromDateTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      dateTreeReader = new DateTreeReader(columnId);
-      setConvertTreeReader(dateTreeReader);
-      date = new Date(0);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      date.setTime(DateWritable.daysToMillis((int) longColVector.vector[elementNum]));
-      String string = date.toString();
-      byte[] bytes = string.getBytes();
-      assignStringGroupVectorEntry(bytesColVector, elementNum, readerType, bytes);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        bytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      dateTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, bytesColVector, batchSize);
-    }
-  }
-
-  public static class StringGroupFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private final TypeDescription readerType;
-
-    StringGroupFromStringGroupTreeReader(int columnId, TypeDescription fileType,
-        TypeDescription readerType) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      stringGroupTreeReader.nextVector(previousVector, isNull, batchSize);
-
-      BytesColumnVector resultColVector = (BytesColumnVector) previousVector;
-
-      if (resultColVector.isRepeating) {
-        if (resultColVector.noNulls || !resultColVector.isNull[0]) {
-          convertStringGroupVectorElement(resultColVector, 0, readerType);
-        } else {
-          // Remains null.
-        }
-      } else if (resultColVector.noNulls){
-        for (int i = 0; i < batchSize; i++) {
-          convertStringGroupVectorElement(resultColVector, i, readerType);
-        }
-      } else {
-        for (int i = 0; i < batchSize; i++) {
-          if (!resultColVector.isNull[i]) {
-            convertStringGroupVectorElement(resultColVector, i, readerType);
-          } else {
-            // Remains null.
-          }
-        }
-      }
-    }
-  }
-
-  public static class StringGroupFromBinaryTreeReader extends ConvertTreeReader {
-
-    private BinaryTreeReader binaryTreeReader;
-
-    private final TypeDescription readerType;
-    private BytesColumnVector inBytesColVector;
-    private BytesColumnVector outBytesColVector;
-
-    StringGroupFromBinaryTreeReader(int columnId, TypeDescription readerType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.readerType = readerType;
-      binaryTreeReader = new BinaryTreeReader(columnId);
-      setConvertTreeReader(binaryTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      byte[] bytes = inBytesColVector.vector[elementNum];
-      int start = inBytesColVector.start[elementNum];
-      int length = inBytesColVector.length[elementNum];
-      byte[] string = new byte[length == 0 ? 0 : 3 * length - 1];
-      for(int p = 0; p < string.length; p += 2) {
-        if (p != 0) {
-          string[p++] = ' ';
-        }
-        int num = 0xff & bytes[start++];
-        int digit = num / 16;
-        string[p] = (byte)((digit) + (digit < 10 ? '0' : 'a' - 10));
-        digit = num % 16;
-        string[p + 1] = (byte)((digit) + (digit < 10 ? '0' : 'a' - 10));
-      }
-      assignStringGroupVectorEntry(outBytesColVector, elementNum, readerType,
-          string, 0, string.length);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (inBytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        inBytesColVector = new BytesColumnVector();
-        outBytesColVector = (BytesColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      binaryTreeReader.nextVector(inBytesColVector, isNull, batchSize);
-
-      convertVector(inBytesColVector, outBytesColVector, batchSize);
-    }
-  }
-
-  public static class TimestampFromAnyIntegerTreeReader extends ConvertTreeReader {
-
-    private AnyIntegerTreeReader anyIntegerAsLongTreeReader;
-
-    private LongColumnVector longColVector;
-    private TimestampColumnVector timestampColVector;
-
-    TimestampFromAnyIntegerTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      anyIntegerAsLongTreeReader =
-          new AnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-      setConvertTreeReader(anyIntegerAsLongTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      long longValue = longColVector.vector[elementNum];
-      // UNDONE: What does the boolean setting need to be?
-      timestampColVector.set(elementNum, new Timestamp(longValue));
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        timestampColVector = (TimestampColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      anyIntegerAsLongTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, timestampColVector, batchSize);
-    }
-  }
-
-  public static class TimestampFromFloatTreeReader extends ConvertTreeReader {
-
-    private FloatTreeReader floatTreeReader;
-
-    private DoubleColumnVector doubleColVector;
-    private TimestampColumnVector timestampColVector;
-
-    TimestampFromFloatTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      floatTreeReader = new FloatTreeReader(columnId);
-      setConvertTreeReader(floatTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      float floatValue = (float) doubleColVector.vector[elementNum];
-      Timestamp timestampValue = TimestampUtils.doubleToTimestamp(floatValue);
-      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
-      timestampColVector.set(elementNum, timestampValue);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        timestampColVector = (TimestampColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      floatTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, timestampColVector, batchSize);
-    }
-  }
-
-  public static class TimestampFromDoubleTreeReader extends ConvertTreeReader {
-
-    private DoubleTreeReader doubleTreeReader;
-
-    private DoubleColumnVector doubleColVector;
-    private TimestampColumnVector timestampColVector;
-
-    TimestampFromDoubleTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      doubleTreeReader = new DoubleTreeReader(columnId);
-      setConvertTreeReader(doubleTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      double doubleValue = doubleColVector.vector[elementNum];
-      Timestamp timestampValue = TimestampUtils.doubleToTimestamp(doubleValue);
-      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
-      timestampColVector.set(elementNum, timestampValue);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (doubleColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        doubleColVector = new DoubleColumnVector();
-        timestampColVector = (TimestampColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      doubleTreeReader.nextVector(doubleColVector, isNull, batchSize);
-
-      convertVector(doubleColVector, timestampColVector, batchSize);
-    }
-  }
-
-  public static class TimestampFromDecimalTreeReader extends ConvertTreeReader {
-
-    private DecimalTreeReader decimalTreeReader;
-
-    private final int precision;
-    private final int scale;
-    private DecimalColumnVector decimalColVector;
-    private TimestampColumnVector timestampColVector;
-
-    TimestampFromDecimalTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      this.precision = fileType.getPrecision();
-      this.scale = fileType.getScale();
-      decimalTreeReader = new DecimalTreeReader(columnId, precision, scale);
-      setConvertTreeReader(decimalTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      Timestamp timestampValue =
-            TimestampUtils.decimalToTimestamp(
-                decimalColVector.vector[elementNum].getHiveDecimal());
-      // The TimestampColumnVector will set the entry to null when a null timestamp is passed in.
-      timestampColVector.set(elementNum, timestampValue);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (decimalColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        decimalColVector = new DecimalColumnVector(precision, scale);
-        timestampColVector = (TimestampColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      decimalTreeReader.nextVector(decimalColVector, isNull, batchSize);
-
-      convertVector(decimalColVector, timestampColVector, batchSize);
-    }
-  }
-
-  public static class TimestampFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private BytesColumnVector bytesColVector;
-    private TimestampColumnVector timestampColVector;
-
-    TimestampFromStringGroupTreeReader(int columnId, TypeDescription fileType)
-        throws IOException {
-      super(columnId);
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String stringValue =
-          stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
-      Timestamp timestampValue = parseTimestampFromString(stringValue);
-      if (timestampValue != null) {
-        timestampColVector.set(elementNum, timestampValue);
-      } else {
-        timestampColVector.noNulls = false;
-        timestampColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (bytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        bytesColVector = new BytesColumnVector();
-        timestampColVector = (TimestampColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
-
-      convertVector(bytesColVector, timestampColVector, batchSize);
-    }
-  }
-
-  public static class TimestampFromDateTreeReader extends ConvertTreeReader {
-
-    private DateTreeReader dateTreeReader;
-
-    private LongColumnVector longColVector;
-    private TimestampColumnVector timestampColVector;
-
-    TimestampFromDateTreeReader(int columnId, TypeDescription fileType,
-        boolean skipCorrupt) throws IOException {
-      super(columnId);
-      dateTreeReader = new DateTreeReader(columnId);
-      setConvertTreeReader(dateTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) {
-      long millis =
-          DateWritable.daysToMillis((int) longColVector.vector[elementNum]);
-      timestampColVector.set(elementNum, new Timestamp(millis));
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (longColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        longColVector = new LongColumnVector();
-        timestampColVector = (TimestampColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      dateTreeReader.nextVector(longColVector, isNull, batchSize);
-
-      convertVector(longColVector, timestampColVector, batchSize);
-    }
-  }
-
-  public static class DateFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    private BytesColumnVector bytesColVector;
-    private LongColumnVector longColVector;
-
-    DateFromStringGroupTreeReader(int columnId, TypeDescription fileType)
-        throws IOException {
-      super(columnId);
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      String stringValue =
-          stringFromBytesColumnVectorEntry(bytesColVector, elementNum);
-      Date dateValue = parseDateFromString(stringValue);
-      if (dateValue != null) {
-        longColVector.vector[elementNum] = DateWritable.dateToDays(dateValue);
-      } else {
-        longColVector.noNulls = false;
-        longColVector.isNull[elementNum] = true;
-      }
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (bytesColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        bytesColVector = new BytesColumnVector();
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      stringGroupTreeReader.nextVector(bytesColVector, isNull, batchSize);
-
-      convertVector(bytesColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class DateFromTimestampTreeReader extends ConvertTreeReader {
-
-    private TimestampTreeReader timestampTreeReader;
-
-    private TimestampColumnVector timestampColVector;
-    private LongColumnVector longColVector;
-
-    DateFromTimestampTreeReader(int columnId, boolean skipCorrupt) throws IOException {
-      super(columnId);
-      timestampTreeReader = new TimestampTreeReader(columnId, skipCorrupt);
-      setConvertTreeReader(timestampTreeReader);
-    }
-
-    @Override
-    public void setConvertVectorElement(int elementNum) throws IOException {
-      Date dateValue =
-          DateWritable.timeToDate(TimestampUtils.millisToSeconds(
-              timestampColVector.asScratchTimestamp(elementNum).getTime()));
-      longColVector.vector[elementNum] = DateWritable.dateToDays(dateValue);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      if (timestampColVector == null) {
-        // Allocate column vector for file; cast column vector for reader.
-        timestampColVector = new TimestampColumnVector();
-        longColVector = (LongColumnVector) previousVector;
-      }
-      // Read present/isNull stream
-      timestampTreeReader.nextVector(timestampColVector, isNull, batchSize);
-
-      convertVector(timestampColVector, longColVector, batchSize);
-    }
-  }
-
-  public static class BinaryFromStringGroupTreeReader extends ConvertTreeReader {
-
-    private TreeReader stringGroupTreeReader;
-
-    BinaryFromStringGroupTreeReader(int columnId, TypeDescription fileType)
-        throws IOException {
-      super(columnId);
-      stringGroupTreeReader = getStringGroupTreeReader(columnId, fileType);
-      setConvertTreeReader(stringGroupTreeReader);
-    }
-
-    @Override
-    public void nextVector(ColumnVector previousVector,
-                           boolean[] isNull,
-                           final int batchSize) throws IOException {
-      super.nextVector(previousVector, isNull, batchSize);
-    }
-  }
-
-  private static TreeReader createAnyIntegerConvertTreeReader(int columnId,
-                                                              TypeDescription fileType,
-                                                              TypeDescription readerType,
-                                                              SchemaEvolution evolution,
-                                                              boolean[] included,
-                                                              boolean skipCorrupt) throws IOException {
-
-    // CONVERT from (BOOLEAN, BYTE, SHORT, INT, LONG) to schema type.
-    //
-    switch (readerType.getCategory()) {
-
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      if (fileType.getCategory() == readerType.getCategory()) {
-        throw new IllegalArgumentException("No conversion of type " +
-            readerType.getCategory() + " to self needed");
-      }
-      return new AnyIntegerFromAnyIntegerTreeReader(columnId, fileType, readerType,
-          skipCorrupt);
-
-    case FLOAT:
-      return new FloatFromAnyIntegerTreeReader(columnId, fileType,
-          skipCorrupt);
-
-    case DOUBLE:
-      return new DoubleFromAnyIntegerTreeReader(columnId, fileType,
-          skipCorrupt);
-
-    case DECIMAL:
-      return new DecimalFromAnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-
-    case STRING:
-    case CHAR:
-    case VARCHAR:
-      return new StringGroupFromAnyIntegerTreeReader(columnId, fileType, readerType,
-          skipCorrupt);
-
-    case TIMESTAMP:
-      return new TimestampFromAnyIntegerTreeReader(columnId, fileType, skipCorrupt);
-
-    // Not currently supported conversion(s):
-    case BINARY:
-    case DATE:
-
-    case STRUCT:
-    case LIST:
-    case MAP:
-    case UNION:
-    default:
-      throw new IllegalArgumentException("Unsupported type " +
-          readerType.getCategory());
-    }
-  }
-
-  private static TreeReader createFloatConvertTreeReader(int columnId,
-                                                         TypeDescription fileType,
-                                                         TypeDescription readerType,
-                                                         SchemaEvolution evolution,
-                                                         boolean[] included,
-                                                         boolean skipCorrupt) throws IOException {
-
-    // CONVERT from FLOAT to schema type.
-    switch (readerType.getCategory()) {
-
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      return new AnyIntegerFromFloatTreeReader(columnId, readerType);
-
-    case FLOAT:
-      throw new IllegalArgumentException("No conversion of type " +
-        readerType.getCategory() + " to self needed");
-
-    case DOUBLE:
-      return new DoubleFromFloatTreeReader(columnId);
-
-    case DECIMAL:
-      return new DecimalFromFloatTreeReader(columnId, readerType);
-
-    case STRING:
-    case CHAR:
-    case VARCHAR:
-      return new StringGroupFromFloatTreeReader(columnId, readerType, skipCorrupt);
-
-    case TIMESTAMP:
-      return new TimestampFromFloatTreeReader(columnId, readerType, skipCorrupt);
-
-    // Not currently supported conversion(s):
-    case BINARY:
-    case DATE:
-
-    case STRUCT:
-    case LIST:
-    case MAP:
-    case UNION:
-    default:
-      throw new IllegalArgumentException("Unsupported type " +
-          readerType.getCategory());
-    }
-  }
-
-  private static TreeReader createDoubleConvertTreeReader(int columnId,
-                                                          TypeDescription fileType,
-                                                          TypeDescription readerType,
-                                                          SchemaEvolution evolution,
-                                                          boolean[] included,
-                                                          boolean skipCorrupt) throws IOException {
-
-    // CONVERT from DOUBLE to schema type.
-    switch (readerType.getCategory()) {
-
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      return new AnyIntegerFromDoubleTreeReader(columnId, readerType);
-
-    case FLOAT:
-      return new FloatFromDoubleTreeReader(columnId);
-
-    case DOUBLE:
-      throw new IllegalArgumentException("No conversion of type " +
-        readerType.getCategory() + " to self needed");
-
-    case DECIMAL:
-      return new DecimalFromDoubleTreeReader(columnId, readerType);
-
-    case STRING:
-    case CHAR:
-    case VARCHAR:
-      return new StringGroupFromDoubleTreeReader(columnId, readerType, skipCorrupt);
-
-    case TIMESTAMP:
-      return new TimestampFromDoubleTreeReader(columnId, readerType, skipCorrupt);
-
-    // Not currently supported conversion(s):
-    case BINARY:
-    case DATE:
-
-    case STRUCT:
-    case LIST:
-    case MAP:
-    case UNION:
-    default:
-      throw new IllegalArgumentException("Unsupported type " +
-          readerType.getCategory());
-    }
-  }
-
-  private static TreeReader createDecimalConvertTreeReader(int columnId,
-                                                           TypeDescription fileType,
-                                                           TypeDescription readerType,
-                                                           SchemaEvolution evolution,
-                                                           boolean[] included,
-                                                           boolean skipCorrupt) throws IOException {
-
-    // CONVERT from DECIMAL to schema type.
-    switch (readerType.getCategory()) {
-
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      return new AnyIntegerFromDecimalTreeReader(columnId, fileType, readerType);
-
-    case FLOAT:
-      return new FloatFromDecimalTreeReader(columnId, fileType, readerType);
-
-    case DOUBLE:
-      return new DoubleFromDecimalTreeReader(columnId, fileType);
-
-    case STRING:
-    case CHAR:
-    case VARCHAR:
-      return new StringGroupFromDecimalTreeReader(columnId, fileType, readerType, skipCorrupt);
-
-    case TIMESTAMP:
-      return new TimestampFromDecimalTreeReader(columnId, fileType, skipCorrupt);
-
-    case DECIMAL:
-      return new DecimalFromDecimalTreeReader(columnId, fileType, readerType);
-
-    // Not currently supported conversion(s):
-    case BINARY:
-    case DATE:
-
-    case STRUCT:
-    case LIST:
-    case MAP:
-    case UNION:
-    default:
-      throw new IllegalArgumentException("Unsupported type " +
-          readerType.getCategory());
-    }
-  }
-
-  private static TreeReader createStringConvertTreeReader(int columnId,
-                                                          TypeDescription fileType,
-                                                          TypeDescription readerType,
-                                                          SchemaEvolution evolution,
-                                                          boolean[] included,
-                                                          boolean skipCorrupt) throws IOException {
-
-    // CONVERT from STRING to schema type.
-    switch (readerType.getCategory()) {
-
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      return new AnyIntegerFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case FLOAT:
-      return new FloatFromStringGroupTreeReader(columnId, fileType);
-
-    case DOUBLE:
-      return new DoubleFromStringGroupTreeReader(columnId, fileType);
-
-    case DECIMAL:
-      return new DecimalFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case CHAR:
-      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case VARCHAR:
-      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case STRING:
-      throw new IllegalArgumentException("No conversion of type " +
-          readerType.getCategory() + " to self needed");
-
-    case BINARY:
-      return new BinaryFromStringGroupTreeReader(columnId, fileType);
-
-    case TIMESTAMP:
-      return new TimestampFromStringGroupTreeReader(columnId, fileType);
-
-    case DATE:
-      return new DateFromStringGroupTreeReader(columnId, fileType);
-
-    // Not currently supported conversion(s):
-
-    case STRUCT:
-    case LIST:
-    case MAP:
-    case UNION:
-    default:
-      throw new IllegalArgumentException("Unsupported type " +
-          readerType.getCategory());
-    }
-  }
-
-  private static TreeReader createCharConvertTreeReader(int columnId,
-                                                        TypeDescription fileType,
-                                                        TypeDescription readerType,
-                                                        SchemaEvolution evolution,
-                                                        boolean[] included,
-                                                        boolean skipCorrupt) throws IOException {
-
-    // CONVERT from CHAR to schema type.
-    switch (readerType.getCategory()) {
-
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      return new AnyIntegerFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case FLOAT:
-      return new FloatFromStringGroupTreeReader(columnId, fileType);
-
-    case DOUBLE:
-      return new DoubleFromStringGroupTreeReader(columnId, fileType);
-
-    case DECIMAL:
-      return new DecimalFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case STRING:
-      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case VARCHAR:
-      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case CHAR:
-      return new StringGroupFromStringGroupTreeReader(columnId, fileType, readerType);
-
-    case BINARY:
-      return new BinaryFromStringGroupTreeReader(columnId, fileType);
-
-    case TIMESTAMP:
-      return new TimestampFromStringGroupTreeReader(columnId, fileType);
-
-    case DATE:
-      return new DateFromStringGroupTreeReader(columnId, fileT

<TRUNCATED>

[19/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/PhysicalFsWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/PhysicalFsWriter.java b/orc/src/java/org/apache/orc/impl/PhysicalFsWriter.java
deleted file mode 100644
index ba8c13f..0000000
--- a/orc/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ /dev/null
@@ -1,529 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.EnumSet;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.CompressionCodec.Modifier;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.OrcFile;
-import org.apache.orc.OrcFile.CompressionStrategy;
-import org.apache.orc.OrcProto;
-import org.apache.orc.OrcProto.BloomFilterIndex;
-import org.apache.orc.OrcProto.Footer;
-import org.apache.orc.OrcProto.Metadata;
-import org.apache.orc.OrcProto.PostScript;
-import org.apache.orc.OrcProto.Stream.Kind;
-import org.apache.orc.OrcProto.StripeFooter;
-import org.apache.orc.OrcProto.StripeInformation;
-import org.apache.orc.OrcProto.RowIndex.Builder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.protobuf.CodedOutputStream;
-
-public class PhysicalFsWriter implements PhysicalWriter {
-  private static final Logger LOG = LoggerFactory.getLogger(PhysicalFsWriter.class);
-
-  private static final int HDFS_BUFFER_SIZE = 256 * 1024;
-
-  private FSDataOutputStream rawWriter = null;
-  // the compressed metadata information outStream
-  private OutStream writer = null;
-  // a protobuf outStream around streamFactory
-  private CodedOutputStream protobufWriter = null;
-
-  private final FileSystem fs;
-  private final Path path;
-  private final long blockSize;
-  private final int bufferSize;
-  private final CompressionCodec codec;
-  private final double paddingTolerance;
-  private final long defaultStripeSize;
-  private final CompressionKind compress;
-  private final boolean addBlockPadding;
-  private final CompressionStrategy compressionStrategy;
-
-  // the streams that make up the current stripe
-  private final Map<StreamName, BufferedStream> streams =
-    new TreeMap<StreamName, BufferedStream>();
-
-  private long adjustedStripeSize;
-  private long headerLength;
-  private long stripeStart;
-  private int metadataLength;
-  private int footerLength;
-
-  public PhysicalFsWriter(FileSystem fs, Path path, int numColumns, OrcFile.WriterOptions opts) {
-    this.fs = fs;
-    this.path = path;
-    this.defaultStripeSize = this.adjustedStripeSize = opts.getStripeSize();
-    this.addBlockPadding = opts.getBlockPadding();
-    if (opts.isEnforceBufferSize()) {
-      this.bufferSize = opts.getBufferSize();
-    } else {
-      this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize());
-    }
-    this.compress = opts.getCompress();
-    this.compressionStrategy = opts.getCompressionStrategy();
-    codec = createCodec(compress);
-    this.paddingTolerance = opts.getPaddingTolerance();
-    this.blockSize = opts.getBlockSize();
-    LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
-        " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
-        compress, bufferSize);
-  }
-
-  @Override
-  public void initialize() throws IOException {
-    if (rawWriter != null) return;
-    rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
-                          fs.getDefaultReplication(path), blockSize);
-    rawWriter.writeBytes(OrcFile.MAGIC);
-    headerLength = rawWriter.getPos();
-    writer = new OutStream("metadata", bufferSize, codec,
-                           new DirectStream(rawWriter));
-    protobufWriter = CodedOutputStream.newInstance(writer);
-  }
-
-  private void padStripe(long indexSize, long dataSize, int footerSize) throws IOException {
-    this.stripeStart = rawWriter.getPos();
-    final long currentStripeSize = indexSize + dataSize + footerSize;
-    final long available = blockSize - (stripeStart % blockSize);
-    final long overflow = currentStripeSize - adjustedStripeSize;
-    final float availRatio = (float) available / (float) defaultStripeSize;
-
-    if (availRatio > 0.0f && availRatio < 1.0f
-        && availRatio > paddingTolerance) {
-      // adjust default stripe size to fit into remaining space, also adjust
-      // the next stripe for correction based on the current stripe size
-      // and user specified padding tolerance. Since stripe size can overflow
-      // the default stripe size we should apply this correction to avoid
-      // writing portion of last stripe to next hdfs block.
-      double correction = overflow > 0 ? (double) overflow
-          / (double) adjustedStripeSize : 0.0;
-
-      // correction should not be greater than user specified padding
-      // tolerance
-      correction = correction > paddingTolerance ? paddingTolerance
-          : correction;
-
-      // adjust next stripe size based on current stripe estimate correction
-      adjustedStripeSize = (long) ((1.0f - correction) * (availRatio * defaultStripeSize));
-    } else if (availRatio >= 1.0) {
-      adjustedStripeSize = defaultStripeSize;
-    }
-
-    if (availRatio < paddingTolerance && addBlockPadding) {
-      long padding = blockSize - (stripeStart % blockSize);
-      byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, padding)];
-      LOG.info(String.format("Padding ORC by %d bytes (<=  %.2f * %d)", 
-          padding, availRatio, defaultStripeSize));
-      stripeStart += padding;
-      while (padding > 0) {
-        int writeLen = (int) Math.min(padding, pad.length);
-        rawWriter.write(pad, 0, writeLen);
-        padding -= writeLen;
-      }
-      adjustedStripeSize = defaultStripeSize;
-    } else if (currentStripeSize < blockSize
-        && (stripeStart % blockSize) + currentStripeSize > blockSize) {
-      // even if you don't pad, reset the default stripe size when crossing a
-      // block boundary
-      adjustedStripeSize = defaultStripeSize;
-    }
-  }
-
-  /**
-   * An output receiver that writes the ByteBuffers to the output stream
-   * as they are received.
-   */
-  private class DirectStream implements OutStream.OutputReceiver {
-    private final FSDataOutputStream output;
-
-    DirectStream(FSDataOutputStream output) {
-      this.output = output;
-    }
-
-    @Override
-    public void output(ByteBuffer buffer) throws IOException {
-      output.write(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
-    }
-  }
-
-  @Override
-  public long getPhysicalStripeSize() {
-    return adjustedStripeSize;
-  }
-
-  @Override
-  public boolean isCompressed() {
-    return codec != null;
-  }
-
-
-  public static CompressionCodec createCodec(CompressionKind kind) {
-    switch (kind) {
-      case NONE:
-        return null;
-      case ZLIB:
-        return new ZlibCodec();
-      case SNAPPY:
-        return new SnappyCodec();
-      case LZO:
-        try {
-          ClassLoader loader = Thread.currentThread().getContextClassLoader();
-          if (loader == null) {
-            loader = WriterImpl.class.getClassLoader();
-          }
-          @SuppressWarnings("unchecked")
-          Class<? extends CompressionCodec> lzo =
-              (Class<? extends CompressionCodec>)
-              loader.loadClass("org.apache.hadoop.hive.ql.io.orc.LzoCodec");
-          return lzo.newInstance();
-        } catch (ClassNotFoundException e) {
-          throw new IllegalArgumentException("LZO is not available.", e);
-        } catch (InstantiationException e) {
-          throw new IllegalArgumentException("Problem initializing LZO", e);
-        } catch (IllegalAccessException e) {
-          throw new IllegalArgumentException("Insufficient access to LZO", e);
-        }
-      default:
-        throw new IllegalArgumentException("Unknown compression codec: " +
-            kind);
-    }
-  }
-
-  private void writeStripeFooter(StripeFooter footer, long dataSize, long indexSize,
-      StripeInformation.Builder dirEntry) throws IOException {
-    footer.writeTo(protobufWriter);
-    protobufWriter.flush();
-    writer.flush();
-    dirEntry.setOffset(stripeStart);
-    dirEntry.setFooterLength(rawWriter.getPos() - stripeStart - dataSize - indexSize);
-  }
-
-  @VisibleForTesting
-  public static int getEstimatedBufferSize(long stripeSize, int numColumns,
-                                           int bs) {
-    // The worst case is that there are 2 big streams per a column and
-    // we want to guarantee that each stream gets ~10 buffers.
-    // This keeps buffers small enough that we don't get really small stripe
-    // sizes.
-    int estBufferSize = (int) (stripeSize / (20 * numColumns));
-    estBufferSize = getClosestBufferSize(estBufferSize);
-    return estBufferSize > bs ? bs : estBufferSize;
-  }
-
-  private static int getClosestBufferSize(int estBufferSize) {
-    final int kb4 = 4 * 1024;
-    final int kb8 = 8 * 1024;
-    final int kb16 = 16 * 1024;
-    final int kb32 = 32 * 1024;
-    final int kb64 = 64 * 1024;
-    final int kb128 = 128 * 1024;
-    final int kb256 = 256 * 1024;
-    if (estBufferSize <= kb4) {
-      return kb4;
-    } else if (estBufferSize > kb4 && estBufferSize <= kb8) {
-      return kb8;
-    } else if (estBufferSize > kb8 && estBufferSize <= kb16) {
-      return kb16;
-    } else if (estBufferSize > kb16 && estBufferSize <= kb32) {
-      return kb32;
-    } else if (estBufferSize > kb32 && estBufferSize <= kb64) {
-      return kb64;
-    } else if (estBufferSize > kb64 && estBufferSize <= kb128) {
-      return kb128;
-    } else {
-      return kb256;
-    }
-  }
-
-  @Override
-  public void writeFileMetadata(Metadata.Builder builder) throws IOException {
-    long startPosn = rawWriter.getPos();
-    Metadata metadata = builder.build();
-    metadata.writeTo(protobufWriter);
-    protobufWriter.flush();
-    writer.flush();
-    this.metadataLength = (int) (rawWriter.getPos() - startPosn);
-  }
-
-  @Override
-  public void writeFileFooter(Footer.Builder builder) throws IOException {
-    long bodyLength = rawWriter.getPos() - metadataLength;
-    builder.setContentLength(bodyLength);
-    builder.setHeaderLength(headerLength);
-    long startPosn = rawWriter.getPos();
-    Footer footer = builder.build();
-    footer.writeTo(protobufWriter);
-    protobufWriter.flush();
-    writer.flush();
-    this.footerLength = (int) (rawWriter.getPos() - startPosn);
-  }
-
-  @Override
-  public void writePostScript(PostScript.Builder builder) throws IOException {
-    builder.setCompression(writeCompressionKind(compress));
-    builder.setFooterLength(footerLength);
-    builder.setMetadataLength(metadataLength);
-    if (compress != CompressionKind.NONE) {
-      builder.setCompressionBlockSize(bufferSize);
-    }
-    PostScript ps = builder.build();
-    // need to write this uncompressed
-    long startPosn = rawWriter.getPos();
-    ps.writeTo(rawWriter);
-    long length = rawWriter.getPos() - startPosn;
-    if (length > 255) {
-      throw new IllegalArgumentException("PostScript too large at " + length);
-    }
-    rawWriter.writeByte((int)length);
-  }
-
-  @Override
-  public void close() throws IOException {
-    rawWriter.close();
-  }
-
-  private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
-    switch (kind) {
-      case NONE: return OrcProto.CompressionKind.NONE;
-      case ZLIB: return OrcProto.CompressionKind.ZLIB;
-      case SNAPPY: return OrcProto.CompressionKind.SNAPPY;
-      case LZO: return OrcProto.CompressionKind.LZO;
-      default:
-        throw new IllegalArgumentException("Unknown compression " + kind);
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    rawWriter.hflush();
-    // TODO: reset?
-  }
-
-  @Override
-  public long getRawWriterPosition() throws IOException {
-    return rawWriter.getPos();
-  }
-
-  @Override
-  public void appendRawStripe(byte[] stripe, int offset, int length,
-      StripeInformation.Builder dirEntry) throws IOException {
-    long start = rawWriter.getPos();
-    long availBlockSpace = blockSize - (start % blockSize);
-
-    // see if stripe can fit in the current hdfs block, else pad the remaining
-    // space in the block
-    if (length < blockSize && length > availBlockSpace &&
-        addBlockPadding) {
-      byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, availBlockSpace)];
-      LOG.info(String.format("Padding ORC by %d bytes while merging..",
-          availBlockSpace));
-      start += availBlockSpace;
-      while (availBlockSpace > 0) {
-        int writeLen = (int) Math.min(availBlockSpace, pad.length);
-        rawWriter.write(pad, 0, writeLen);
-        availBlockSpace -= writeLen;
-      }
-    }
-
-    rawWriter.write(stripe);
-    dirEntry.setOffset(start);
-  }
-
-
-  /**
-   * This class is used to hold the contents of streams as they are buffered.
-   * The TreeWriters write to the outStream and the codec compresses the
-   * data as buffers fill up and stores them in the output list. When the
-   * stripe is being written, the whole stream is written to the file.
-   */
-  private class BufferedStream implements OutStream.OutputReceiver {
-    private final OutStream outStream;
-    private final List<ByteBuffer> output = new ArrayList<ByteBuffer>();
-
-    BufferedStream(String name, int bufferSize,
-                   CompressionCodec codec) throws IOException {
-      outStream = new OutStream(name, bufferSize, codec, this);
-    }
-
-    /**
-     * Receive a buffer from the compression codec.
-     * @param buffer the buffer to save
-     */
-    @Override
-    public void output(ByteBuffer buffer) {
-      output.add(buffer);
-    }
-
-    /**
-     * @return the number of bytes in buffers that are allocated to this stream.
-     */
-    public long getBufferSize() {
-      long result = 0;
-      for (ByteBuffer buf: output) {
-        result += buf.capacity();
-      }
-      return outStream.getBufferSize() + result;
-    }
-
-    /**
-     * Write any saved buffers to the OutputStream if needed, and clears all the buffers.
-     */
-    public void spillToDiskAndClear() throws IOException {
-      if (!outStream.isSuppressed()) {
-        for (ByteBuffer buffer: output) {
-          rawWriter.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
-            buffer.remaining());
-        }
-      }
-      outStream.clear();
-      output.clear();
-    }
-
-    /**
-     * @return The number of bytes that will be written to the output. Assumes the stream writing
-     *         into this receiver has already been flushed.
-     */
-    public long getOutputSize() {
-      long result = 0;
-      for (ByteBuffer buffer: output) {
-        result += buffer.remaining();
-      }
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      return outStream.toString();
-    }
-  }
-
-  @Override
-  public OutStream getOrCreatePhysicalStream(StreamName name) throws IOException {
-    BufferedStream result = streams.get(name);
-    if (result == null) {
-      EnumSet<Modifier> modifiers = createCompressionModifiers(name.getKind());
-      result = new BufferedStream(name.toString(), bufferSize,
-          codec == null ? null : codec.modify(modifiers));
-      streams.put(name, result);
-    }
-    return result.outStream;
-  }
-
-  private EnumSet<Modifier> createCompressionModifiers(Kind kind) {
-    switch (kind) {
-      case BLOOM_FILTER:
-      case DATA:
-      case DICTIONARY_DATA:
-        return EnumSet.of(Modifier.TEXT,
-            compressionStrategy == CompressionStrategy.SPEED ? Modifier.FAST : Modifier.DEFAULT);
-      case LENGTH:
-      case DICTIONARY_COUNT:
-      case PRESENT:
-      case ROW_INDEX:
-      case SECONDARY:
-        // easily compressed using the fastest modes
-        return EnumSet.of(CompressionCodec.Modifier.FASTEST, CompressionCodec.Modifier.BINARY);
-      default:
-        LOG.warn("Missing ORC compression modifiers for " + kind);
-        return null;
-    }
-  }
-
-  @Override
-  public void finalizeStripe(StripeFooter.Builder footerBuilder,
-      StripeInformation.Builder dirEntry) throws IOException {
-    long indexSize = 0;
-    long dataSize = 0;
-    for (Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) {
-      BufferedStream receiver = pair.getValue();
-      OutStream outStream = receiver.outStream;
-      if (!outStream.isSuppressed()) {
-        outStream.flush();
-        long streamSize = receiver.getOutputSize();
-        StreamName name = pair.getKey();
-        footerBuilder.addStreams(OrcProto.Stream.newBuilder().setColumn(name.getColumn())
-            .setKind(name.getKind()).setLength(streamSize));
-        if (StreamName.Area.INDEX == name.getArea()) {
-          indexSize += streamSize;
-        } else {
-          dataSize += streamSize;
-        }
-      }
-    }
-    dirEntry.setIndexLength(indexSize).setDataLength(dataSize);
-
-    OrcProto.StripeFooter footer = footerBuilder.build();
-    // Do we need to pad the file so the stripe doesn't straddle a block boundary?
-    padStripe(indexSize, dataSize, footer.getSerializedSize());
-
-    // write out the data streams
-    for (Map.Entry<StreamName, BufferedStream> pair : streams.entrySet()) {
-      pair.getValue().spillToDiskAndClear();
-    }
-    // Write out the footer.
-    writeStripeFooter(footer, dataSize, indexSize, dirEntry);
-  }
-
-  @Override
-  public long estimateMemory() {
-    long result = 0;
-    for (BufferedStream stream: streams.values()) {
-      result += stream.getBufferSize();
-    }
-    return result;
-  }
-
-  @Override
-  public void writeIndexStream(StreamName name, Builder rowIndex) throws IOException {
-    OutStream stream = getOrCreatePhysicalStream(name);
-    rowIndex.build().writeTo(stream);
-    stream.flush();
-  }
-
-  @Override
-  public void writeBloomFilterStream(
-      StreamName name, BloomFilterIndex.Builder bloomFilterIndex) throws IOException {
-    OutStream stream = getOrCreatePhysicalStream(name);
-    bloomFilterIndex.build().writeTo(stream);
-    stream.flush();
-  }
-
-  @VisibleForTesting
-  public OutputStream getStream() throws IOException {
-    initialize();
-    return rawWriter;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/PhysicalWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/PhysicalWriter.java b/orc/src/java/org/apache/orc/impl/PhysicalWriter.java
deleted file mode 100644
index 5ba1b9b..0000000
--- a/orc/src/java/org/apache/orc/impl/PhysicalWriter.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.io.IOException;
-
-import org.apache.orc.OrcProto.BloomFilterIndex;
-import org.apache.orc.OrcProto.Footer;
-import org.apache.orc.OrcProto.Metadata;
-import org.apache.orc.OrcProto.PostScript;
-import org.apache.orc.OrcProto.RowIndex;
-import org.apache.orc.OrcProto.StripeFooter;
-import org.apache.orc.OrcProto.StripeInformation;
-
-public interface PhysicalWriter {
-
-  /**
-   * Creates all the streams/connections/etc. necessary to write.
-   */
-  void initialize() throws IOException;
-
-  /**
-   * Writes out the file metadata.
-   * @param builder Metadata builder to finalize and write.
-   */
-  void writeFileMetadata(Metadata.Builder builder) throws IOException;
-
-  /**
-   * Writes out the file footer.
-   * @param builder Footer builder to finalize and write.
-   */
-  void writeFileFooter(Footer.Builder builder) throws IOException;
-
-  /**
-   * Writes out the postscript (including the size byte if needed).
-   * @param builder Postscript builder to finalize and write.
-   */
-  void writePostScript(PostScript.Builder builder) throws IOException;
-
-  /**
-   * Creates physical stream to write data to.
-   * @param name Stream name.
-   * @return The output stream.
-   */
-  OutStream getOrCreatePhysicalStream(StreamName name) throws IOException;
-
-  /**
-   * Flushes the data in all the streams, spills them to disk, write out stripe footer.
-   * @param footer Stripe footer to be updated with relevant data and written out.
-   * @param dirEntry File metadata entry for the stripe, to be updated with relevant data.
-   */
-  void finalizeStripe(StripeFooter.Builder footer,
-      StripeInformation.Builder dirEntry) throws IOException;
-
-  /**
-   * Writes out the index for the stripe column.
-   * @param streamName Stream name.
-   * @param rowIndex Row index entries to write.
-   */
-  void writeIndexStream(StreamName name, RowIndex.Builder rowIndex) throws IOException;
-
-  /**
-   * Writes out the index for the stripe column.
-   * @param streamName Stream name.
-   * @param bloomFilterIndex Bloom filter index to write.
-   */
-  void writeBloomFilterStream(StreamName streamName,
-      BloomFilterIndex.Builder bloomFilterIndex) throws IOException;
-
-  /**
-   * Closes the writer.
-   */
-  void close() throws IOException;
-
-  /**
-   * Force-flushes the writer.
-   */
-  void flush() throws IOException;
-
-  /**
-   * @return the physical writer position (e.g. for updater).
-   */
-  long getRawWriterPosition() throws IOException;
-
-  /** @return physical stripe size, taking padding into account. */
-  long getPhysicalStripeSize();
-
-  /** @return whether the writer is compressed. */
-  boolean isCompressed();
-
-  /**
-   * Appends raw stripe data (e.g. for file merger).
-   * @param stripe Stripe data buffer.
-   * @param offset Stripe data buffer offset.
-   * @param length Stripe data buffer length.
-   * @param dirEntry File metadata entry for the stripe, to be updated with relevant data.
-   * @throws IOException
-   */
-  void appendRawStripe(byte[] stripe, int offset, int length,
-      StripeInformation.Builder dirEntry) throws IOException;
-
-  /**
-   * @return the estimated memory usage for the stripe.
-   */
-  long estimateMemory();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/PositionProvider.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/PositionProvider.java b/orc/src/java/org/apache/orc/impl/PositionProvider.java
deleted file mode 100644
index 47cf481..0000000
--- a/orc/src/java/org/apache/orc/impl/PositionProvider.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-/**
- * An interface used for seeking to a row index.
- */
-public interface PositionProvider {
-  long getNext();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/PositionRecorder.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/PositionRecorder.java b/orc/src/java/org/apache/orc/impl/PositionRecorder.java
deleted file mode 100644
index 1fff760..0000000
--- a/orc/src/java/org/apache/orc/impl/PositionRecorder.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-/**
- * An interface for recording positions in a stream.
- */
-public interface PositionRecorder {
-  void addPosition(long offset);
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/PositionedOutputStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/PositionedOutputStream.java b/orc/src/java/org/apache/orc/impl/PositionedOutputStream.java
deleted file mode 100644
index d412939..0000000
--- a/orc/src/java/org/apache/orc/impl/PositionedOutputStream.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-public abstract class PositionedOutputStream extends OutputStream {
-
-  /**
-   * Record the current position to the recorder.
-   * @param recorder the object that receives the position
-   * @throws IOException
-   */
-  public abstract void getPosition(PositionRecorder recorder
-                                   ) throws IOException;
-
-  /**
-   * Get the memory size currently allocated as buffer associated with this
-   * stream.
-   * @return the number of bytes used by buffers.
-   */
-  public abstract long getBufferSize();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ReaderImpl.java b/orc/src/java/org/apache/orc/impl/ReaderImpl.java
deleted file mode 100644
index 70fa628..0000000
--- a/orc/src/java/org/apache/orc/impl/ReaderImpl.java
+++ /dev/null
@@ -1,764 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.orc.CompressionKind;
-import org.apache.orc.FileMetadata;
-import org.apache.orc.OrcFile;
-import org.apache.orc.OrcUtils;
-import org.apache.orc.Reader;
-import org.apache.orc.RecordReader;
-import org.apache.orc.TypeDescription;
-import org.apache.orc.ColumnStatistics;
-import org.apache.orc.CompressionCodec;
-import org.apache.orc.FileFormatException;
-import org.apache.orc.StripeInformation;
-import org.apache.orc.StripeStatistics;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.OrcProto;
-
-import com.google.common.collect.Lists;
-import com.google.protobuf.CodedInputStream;
-
-public class ReaderImpl implements Reader {
-
-  private static final Logger LOG = LoggerFactory.getLogger(ReaderImpl.class);
-
-  private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
-
-  protected final FileSystem fileSystem;
-  private final long maxLength;
-  protected final Path path;
-  protected final org.apache.orc.CompressionKind compressionKind;
-  protected CompressionCodec codec;
-  protected int bufferSize;
-  protected OrcProto.Metadata metadata;
-  private List<OrcProto.StripeStatistics> stripeStats;
-  private final int metadataSize;
-  protected final List<OrcProto.Type> types;
-  private TypeDescription schema;
-  private final List<OrcProto.UserMetadataItem> userMetadata;
-  private final List<OrcProto.ColumnStatistics> fileStats;
-  private final List<StripeInformation> stripes;
-  protected final int rowIndexStride;
-  private final long contentLength, numberOfRows;
-
-  private long deserializedSize = -1;
-  protected final Configuration conf;
-  private final List<Integer> versionList;
-  private final OrcFile.WriterVersion writerVersion;
-
-  protected OrcTail tail;
-
-  public static class StripeInformationImpl
-      implements StripeInformation {
-    private final OrcProto.StripeInformation stripe;
-
-    public StripeInformationImpl(OrcProto.StripeInformation stripe) {
-      this.stripe = stripe;
-    }
-
-    @Override
-    public long getOffset() {
-      return stripe.getOffset();
-    }
-
-    @Override
-    public long getLength() {
-      return stripe.getDataLength() + getIndexLength() + getFooterLength();
-    }
-
-    @Override
-    public long getDataLength() {
-      return stripe.getDataLength();
-    }
-
-    @Override
-    public long getFooterLength() {
-      return stripe.getFooterLength();
-    }
-
-    @Override
-    public long getIndexLength() {
-      return stripe.getIndexLength();
-    }
-
-    @Override
-    public long getNumberOfRows() {
-      return stripe.getNumberOfRows();
-    }
-
-    @Override
-    public String toString() {
-      return "offset: " + getOffset() + " data: " + getDataLength() +
-        " rows: " + getNumberOfRows() + " tail: " + getFooterLength() +
-        " index: " + getIndexLength();
-    }
-  }
-
-  @Override
-  public long getNumberOfRows() {
-    return numberOfRows;
-  }
-
-  @Override
-  public List<String> getMetadataKeys() {
-    List<String> result = new ArrayList<String>();
-    for(OrcProto.UserMetadataItem item: userMetadata) {
-      result.add(item.getName());
-    }
-    return result;
-  }
-
-  @Override
-  public ByteBuffer getMetadataValue(String key) {
-    for(OrcProto.UserMetadataItem item: userMetadata) {
-      if (item.hasName() && item.getName().equals(key)) {
-        return item.getValue().asReadOnlyByteBuffer();
-      }
-    }
-    throw new IllegalArgumentException("Can't find user metadata " + key);
-  }
-
-  public boolean hasMetadataValue(String key) {
-    for(OrcProto.UserMetadataItem item: userMetadata) {
-      if (item.hasName() && item.getName().equals(key)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  @Override
-  public org.apache.orc.CompressionKind getCompressionKind() {
-    return compressionKind;
-  }
-
-  @Override
-  public int getCompressionSize() {
-    return bufferSize;
-  }
-
-  @Override
-  public List<StripeInformation> getStripes() {
-    return stripes;
-  }
-
-  @Override
-  public long getContentLength() {
-    return contentLength;
-  }
-
-  @Override
-  public List<OrcProto.Type> getTypes() {
-    return types;
-  }
-
-  @Override
-  public OrcFile.Version getFileVersion() {
-    for (OrcFile.Version version: OrcFile.Version.values()) {
-      if ((versionList != null && !versionList.isEmpty()) &&
-          version.getMajor() == versionList.get(0) &&
-          version.getMinor() == versionList.get(1)) {
-        return version;
-      }
-    }
-    return OrcFile.Version.V_0_11;
-  }
-
-  @Override
-  public OrcFile.WriterVersion getWriterVersion() {
-    return writerVersion;
-  }
-
-  @Override
-  public OrcProto.FileTail getFileTail() {
-    return tail.getFileTail();
-  }
-
-  @Override
-  public int getRowIndexStride() {
-    return rowIndexStride;
-  }
-
-  @Override
-  public ColumnStatistics[] getStatistics() {
-    ColumnStatistics[] result = new ColumnStatistics[types.size()];
-    for(int i=0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(fileStats.get(i));
-    }
-    return result;
-  }
-
-  @Override
-  public TypeDescription getSchema() {
-    return schema;
-  }
-
-  /**
-   * Ensure this is an ORC file to prevent users from trying to read text
-   * files or RC files as ORC files.
-   * @param in the file being read
-   * @param path the filename for error messages
-   * @param psLen the postscript length
-   * @param buffer the tail of the file
-   * @throws IOException
-   */
-  protected static void ensureOrcFooter(FSDataInputStream in,
-                                        Path path,
-                                        int psLen,
-                                        ByteBuffer buffer) throws IOException {
-    int magicLength = OrcFile.MAGIC.length();
-    int fullLength = magicLength + 1;
-    if (psLen < fullLength || buffer.remaining() < fullLength) {
-      throw new FileFormatException("Malformed ORC file " + path +
-          ". Invalid postscript length " + psLen);
-    }
-    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
-    byte[] array = buffer.array();
-    // now look for the magic string at the end of the postscript.
-    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
-      // If it isn't there, this may be the 0.11.0 version of ORC.
-      // Read the first 3 bytes of the file to check for the header
-      byte[] header = new byte[magicLength];
-      in.readFully(0, header, 0, magicLength);
-      // if it isn't there, this isn't an ORC file
-      if (!Text.decode(header, 0 , magicLength).equals(OrcFile.MAGIC)) {
-        throw new FileFormatException("Malformed ORC file " + path +
-            ". Invalid postscript.");
-      }
-    }
-  }
-
-  /**
-   * Ensure this is an ORC file to prevent users from trying to read text
-   * files or RC files as ORC files.
-   * @param psLen the postscript length
-   * @param buffer the tail of the file
-   * @throws IOException
-   */
-  protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
-    int magicLength = OrcFile.MAGIC.length();
-    int fullLength = magicLength + 1;
-    if (psLen < fullLength || buffer.remaining() < fullLength) {
-      throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
-    }
-
-    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
-    byte[] array = buffer.array();
-    // now look for the magic string at the end of the postscript.
-    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
-      // if it isn't there, this may be 0.11.0 version of the ORC file.
-      // Read the first 3 bytes from the buffer to check for the header
-      if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
-        throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
-      }
-    }
-  }
-
-  /**
-   * Build a version string out of an array.
-   * @param version the version number as a list
-   * @return the human readable form of the version string
-   */
-  private static String versionString(List<Integer> version) {
-    StringBuilder buffer = new StringBuilder();
-    for(int i=0; i < version.size(); ++i) {
-      if (i != 0) {
-        buffer.append('.');
-      }
-      buffer.append(version.get(i));
-    }
-    return buffer.toString();
-  }
-
-  /**
-   * Check to see if this ORC file is from a future version and if so,
-   * warn the user that we may not be able to read all of the column encodings.
-   * @param log the logger to write any error message to
-   * @param path the data source path for error messages
-   * @param version the version of hive that wrote the file.
-   */
-  protected static void checkOrcVersion(Logger log, Path path,
-                                        List<Integer> version) {
-    if (version.size() >= 1) {
-      int major = version.get(0);
-      int minor = 0;
-      if (version.size() >= 2) {
-        minor = version.get(1);
-      }
-      if (major > OrcFile.Version.CURRENT.getMajor() ||
-          (major == OrcFile.Version.CURRENT.getMajor() &&
-           minor > OrcFile.Version.CURRENT.getMinor())) {
-        log.warn(path + " was written by a future Hive version " +
-                 versionString(version) +
-                 ". This file may not be readable by this version of Hive.");
-      }
-    }
-  }
-
-  /**
-  * Constructor that let's the user specify additional options.
-   * @param path pathname for file
-   * @param options options for reading
-   * @throws IOException
-   */
-  public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
-    FileSystem fs = options.getFilesystem();
-    if (fs == null) {
-      fs = path.getFileSystem(options.getConfiguration());
-    }
-    this.fileSystem = fs;
-    this.path = path;
-    this.conf = options.getConfiguration();
-    this.maxLength = options.getMaxLength();
-    FileMetadata fileMetadata = options.getFileMetadata();
-    if (fileMetadata != null) {
-      this.compressionKind = fileMetadata.getCompressionKind();
-      this.bufferSize = fileMetadata.getCompressionBufferSize();
-      this.codec = PhysicalFsWriter.createCodec(compressionKind);
-      this.metadataSize = fileMetadata.getMetadataSize();
-      this.stripeStats = fileMetadata.getStripeStats();
-      this.versionList = fileMetadata.getVersionList();
-      this.writerVersion =
-          OrcFile.WriterVersion.from(fileMetadata.getWriterVersionNum());
-      this.types = fileMetadata.getTypes();
-      this.rowIndexStride = fileMetadata.getRowIndexStride();
-      this.contentLength = fileMetadata.getContentLength();
-      this.numberOfRows = fileMetadata.getNumberOfRows();
-      this.fileStats = fileMetadata.getFileStats();
-      this.stripes = fileMetadata.getStripes();
-      this.userMetadata = null; // not cached and not needed here
-    } else {
-      OrcTail orcTail = options.getOrcTail();
-      if (orcTail == null) {
-        tail = extractFileTail(fs, path, options.getMaxLength());
-        options.orcTail(tail);
-      } else {
-        tail = orcTail;
-      }
-      this.compressionKind = tail.getCompressionKind();
-      this.codec = tail.getCompressionCodec();
-      this.bufferSize = tail.getCompressionBufferSize();
-      this.metadataSize = tail.getMetadataSize();
-      this.versionList = tail.getPostScript().getVersionList();
-      this.types = tail.getFooter().getTypesList();
-      this.rowIndexStride = tail.getFooter().getRowIndexStride();
-      this.contentLength = tail.getFooter().getContentLength();
-      this.numberOfRows = tail.getFooter().getNumberOfRows();
-      this.userMetadata = tail.getFooter().getMetadataList();
-      this.fileStats = tail.getFooter().getStatisticsList();
-      this.writerVersion = tail.getWriterVersion();
-      this.stripes = tail.getStripes();
-      this.stripeStats = tail.getStripeStatisticsProto();
-    }
-    this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
-  }
-
-  /**
-   * Get the WriterVersion based on the ORC file postscript.
-   * @param writerVersion the integer writer version
-   * @return the version of the software that produced the file
-   */
-  public static OrcFile.WriterVersion getWriterVersion(int writerVersion) {
-    for(OrcFile.WriterVersion version: OrcFile.WriterVersion.values()) {
-      if (version.getId() == writerVersion) {
-        return version;
-      }
-    }
-    return OrcFile.WriterVersion.FUTURE;
-  }
-
-  private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
-      int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
-    bb.position(footerAbsPos);
-    bb.limit(footerAbsPos + footerSize);
-    return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
-        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
-  }
-
-  public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
-      int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
-    bb.position(metadataAbsPos);
-    bb.limit(metadataAbsPos + metadataSize);
-    return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
-        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
-  }
-
-  private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
-      int psLen, int psAbsOffset) throws IOException {
-    // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
-    assert bb.hasArray();
-    CodedInputStream in = CodedInputStream.newInstance(
-        bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
-    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
-    checkOrcVersion(LOG, path, ps.getVersionList());
-
-    // Check compression codec.
-    switch (ps.getCompression()) {
-      case NONE:
-        break;
-      case ZLIB:
-        break;
-      case SNAPPY:
-        break;
-      case LZO:
-        break;
-      default:
-        throw new IllegalArgumentException("Unknown compression");
-    }
-    return ps;
-  }
-
-  public static OrcTail extractFileTail(ByteBuffer buffer)
-      throws IOException {
-    return extractFileTail(buffer, -1, -1);
-  }
-
-  public static OrcTail extractFileTail(ByteBuffer buffer, long fileLength, long modificationTime)
-      throws IOException {
-    int readSize = buffer.limit();
-    int psLen = buffer.get(readSize - 1) & 0xff;
-    int psOffset = readSize - 1 - psLen;
-    ensureOrcFooter(buffer, psLen);
-    byte[] psBuffer = new byte[psLen];
-    System.arraycopy(buffer.array(), psOffset, psBuffer, 0, psLen);
-    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(psBuffer);
-    int footerSize = (int) ps.getFooterLength();
-    CompressionCodec codec = PhysicalFsWriter
-        .createCodec(CompressionKind.valueOf(ps.getCompression().name()));
-    OrcProto.Footer footer = extractFooter(buffer,
-        (int) (buffer.position() + ps.getMetadataLength()),
-        footerSize, codec, (int) ps.getCompressionBlockSize());
-    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder()
-        .setPostscriptLength(psLen)
-        .setPostscript(ps)
-        .setFooter(footer)
-        .setFileLength(fileLength);
-    // clear does not clear the contents but sets position to 0 and limit = capacity
-    buffer.clear();
-    return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
-  }
-
-  protected OrcTail extractFileTail(FileSystem fs, Path path,
-      long maxFileLength) throws IOException {
-    FSDataInputStream file = fs.open(path);
-    ByteBuffer buffer;
-    OrcProto.PostScript ps;
-    OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
-    long modificationTime;
-    try {
-      // figure out the size of the file using the option or filesystem
-      long size;
-      if (maxFileLength == Long.MAX_VALUE) {
-        FileStatus fileStatus = fs.getFileStatus(path);
-        size = fileStatus.getLen();
-        modificationTime = fileStatus.getModificationTime();
-      } else {
-        size = maxFileLength;
-        modificationTime = -1;
-      }
-      fileTailBuilder.setFileLength(size);
-
-      //read last bytes into buffer to get PostScript
-      int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
-      buffer = ByteBuffer.allocate(readSize);
-      assert buffer.position() == 0;
-      file.readFully((size - readSize),
-          buffer.array(), buffer.arrayOffset(), readSize);
-      buffer.position(0);
-
-      //read the PostScript
-      //get length of PostScript
-      int psLen = buffer.get(readSize - 1) & 0xff;
-      ensureOrcFooter(file, path, psLen, buffer);
-      int psOffset = readSize - 1 - psLen;
-      ps = extractPostScript(buffer, path, psLen, psOffset);
-      bufferSize = (int) ps.getCompressionBlockSize();
-      codec = PhysicalFsWriter.createCodec(CompressionKind.valueOf(ps.getCompression().name()));
-      fileTailBuilder.setPostscriptLength(psLen).setPostscript(ps);
-
-      int footerSize = (int) ps.getFooterLength();
-      int metadataSize = (int) ps.getMetadataLength();
-
-      //check if extra bytes need to be read
-      int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
-      int tailSize = 1 + psLen + footerSize + metadataSize;
-      if (extra > 0) {
-        //more bytes need to be read, seek back to the right place and read extra bytes
-        ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
-        file.readFully((size - readSize - extra), extraBuf.array(),
-            extraBuf.arrayOffset() + extraBuf.position(), extra);
-        extraBuf.position(extra);
-        //append with already read bytes
-        extraBuf.put(buffer);
-        buffer = extraBuf;
-        buffer.position(0);
-        buffer.limit(tailSize);
-        readSize += extra;
-        psOffset = readSize - 1 - psLen;
-      } else {
-        //footer is already in the bytes in buffer, just adjust position, length
-        buffer.position(psOffset - footerSize - metadataSize);
-        buffer.limit(buffer.position() + tailSize);
-      }
-
-      buffer.mark();
-      int footerOffset = psOffset - footerSize;
-      buffer.position(footerOffset);
-      ByteBuffer footerBuffer = buffer.slice();
-      buffer.reset();
-      OrcProto.Footer footer = extractFooter(footerBuffer, 0, footerSize,
-          codec, bufferSize);
-      fileTailBuilder.setFooter(footer);
-    } finally {
-      try {
-        file.close();
-      } catch (IOException ex) {
-        LOG.error("Failed to close the file after another error", ex);
-      }
-    }
-
-    ByteBuffer serializedTail = ByteBuffer.allocate(buffer.remaining());
-    serializedTail.put(buffer.slice());
-    serializedTail.rewind();
-    return new OrcTail(fileTailBuilder.build(), serializedTail, modificationTime);
-  }
-
-  @Override
-  public ByteBuffer getSerializedFileFooter() {
-    return tail.getSerializedTail();
-  }
-
-  @Override
-  public RecordReader rows() throws IOException {
-    return rows(new Options());
-  }
-
-  @Override
-  public RecordReader rows(Options options) throws IOException {
-    LOG.info("Reading ORC rows from " + path + " with " + options);
-    return new RecordReaderImpl(this, options);
-  }
-
-
-  @Override
-  public long getRawDataSize() {
-    // if the deserializedSize is not computed, then compute it, else
-    // return the already computed size. since we are reading from the footer
-    // we don't have to compute deserialized size repeatedly
-    if (deserializedSize == -1) {
-      List<Integer> indices = Lists.newArrayList();
-      for (int i = 0; i < fileStats.size(); ++i) {
-        indices.add(i);
-      }
-      deserializedSize = getRawDataSizeFromColIndices(indices);
-    }
-    return deserializedSize;
-  }
-
-  @Override
-  public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
-    return getRawDataSizeFromColIndices(colIndices, types, fileStats);
-  }
-
-  public static long getRawDataSizeFromColIndices(
-      List<Integer> colIndices, List<OrcProto.Type> types,
-      List<OrcProto.ColumnStatistics> stats) {
-    long result = 0;
-    for (int colIdx : colIndices) {
-      result += getRawDataSizeOfColumn(colIdx, types, stats);
-    }
-    return result;
-  }
-
-  private static long getRawDataSizeOfColumn(int colIdx, List<OrcProto.Type> types,
-      List<OrcProto.ColumnStatistics> stats) {
-    OrcProto.ColumnStatistics colStat = stats.get(colIdx);
-    long numVals = colStat.getNumberOfValues();
-    OrcProto.Type type = types.get(colIdx);
-
-    switch (type.getKind()) {
-    case BINARY:
-      // old orc format doesn't support binary statistics. checking for binary
-      // statistics is not required as protocol buffers takes care of it.
-      return colStat.getBinaryStatistics().getSum();
-    case STRING:
-    case CHAR:
-    case VARCHAR:
-      // old orc format doesn't support sum for string statistics. checking for
-      // existence is not required as protocol buffers takes care of it.
-
-      // ORC strings are deserialized to java strings. so use java data model's
-      // string size
-      numVals = numVals == 0 ? 1 : numVals;
-      int avgStrLen = (int) (colStat.getStringStatistics().getSum() / numVals);
-      return numVals * JavaDataModel.get().lengthForStringOfLength(avgStrLen);
-    case TIMESTAMP:
-      return numVals * JavaDataModel.get().lengthOfTimestamp();
-    case DATE:
-      return numVals * JavaDataModel.get().lengthOfDate();
-    case DECIMAL:
-      return numVals * JavaDataModel.get().lengthOfDecimal();
-    case DOUBLE:
-    case LONG:
-      return numVals * JavaDataModel.get().primitive2();
-    case FLOAT:
-    case INT:
-    case SHORT:
-    case BOOLEAN:
-    case BYTE:
-      return numVals * JavaDataModel.get().primitive1();
-    default:
-      LOG.debug("Unknown primitive category: " + type.getKind());
-      break;
-    }
-
-    return 0;
-  }
-
-  @Override
-  public long getRawDataSizeOfColumns(List<String> colNames) {
-    List<Integer> colIndices = getColumnIndicesFromNames(colNames);
-    return getRawDataSizeFromColIndices(colIndices);
-  }
-
-  private List<Integer> getColumnIndicesFromNames(List<String> colNames) {
-    // top level struct
-    OrcProto.Type type = types.get(0);
-    List<Integer> colIndices = Lists.newArrayList();
-    List<String> fieldNames = type.getFieldNamesList();
-    int fieldIdx;
-    for (String colName : colNames) {
-      if (fieldNames.contains(colName)) {
-        fieldIdx = fieldNames.indexOf(colName);
-      } else {
-        String s = "Cannot find field for: " + colName + " in ";
-        for (String fn : fieldNames) {
-          s += fn + ", ";
-        }
-        LOG.warn(s);
-        continue;
-      }
-
-      // a single field may span multiple columns. find start and end column
-      // index for the requested field
-      int idxStart = type.getSubtypes(fieldIdx);
-
-      int idxEnd;
-
-      // if the specified is the last field and then end index will be last
-      // column index
-      if (fieldIdx + 1 > fieldNames.size() - 1) {
-        idxEnd = getLastIdx() + 1;
-      } else {
-        idxEnd = type.getSubtypes(fieldIdx + 1);
-      }
-
-      // if start index and end index are same then the field is a primitive
-      // field else complex field (like map, list, struct, union)
-      if (idxStart == idxEnd) {
-        // simple field
-        colIndices.add(idxStart);
-      } else {
-        // complex fields spans multiple columns
-        for (int i = idxStart; i < idxEnd; i++) {
-          colIndices.add(i);
-        }
-      }
-    }
-    return colIndices;
-  }
-
-  private int getLastIdx() {
-    Set<Integer> indices = new HashSet<>();
-    for (OrcProto.Type type : types) {
-      indices.addAll(type.getSubtypesList());
-    }
-    return Collections.max(indices);
-  }
-
-  @Override
-  public List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics() {
-    return stripeStats;
-  }
-
-  @Override
-  public List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics() {
-    return fileStats;
-  }
-
-  @Override
-  public List<StripeStatistics> getStripeStatistics() throws IOException {
-    if (stripeStats == null && metadata == null) {
-      metadata = extractMetadata(tail.getSerializedTail(), 0, metadataSize, codec, bufferSize);
-      stripeStats = metadata.getStripeStatsList();
-    }
-    List<StripeStatistics> result = new ArrayList<>();
-    for (OrcProto.StripeStatistics ss : stripeStats) {
-      result.add(new StripeStatistics(ss.getColStatsList()));
-    }
-    return result;
-  }
-
-  public List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata() {
-    return userMetadata;
-  }
-
-  @Override
-  public List<Integer> getVersionList() {
-    return versionList;
-  }
-
-  @Override
-  public int getMetadataSize() {
-    return metadataSize;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder buffer = new StringBuilder();
-    buffer.append("ORC Reader(");
-    buffer.append(path);
-    if (maxLength != -1) {
-      buffer.append(", ");
-      buffer.append(maxLength);
-    }
-    buffer.append(")");
-    return buffer.toString();
-  }
-}


[17/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java b/orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java
deleted file mode 100644
index 09108b2..0000000
--- a/orc/src/java/org/apache/orc/impl/RunLengthByteWriter.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-
-/**
- * A streamFactory that writes a sequence of bytes. A control byte is written before
- * each run with positive values 0 to 127 meaning 2 to 129 repetitions. If the
- * bytes is -1 to -128, 1 to 128 literal byte values follow.
- */
-public class RunLengthByteWriter {
-  static final int MIN_REPEAT_SIZE = 3;
-  static final int MAX_LITERAL_SIZE = 128;
-  static final int MAX_REPEAT_SIZE= 127 + MIN_REPEAT_SIZE;
-  private final PositionedOutputStream output;
-  private final byte[] literals = new byte[MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private boolean repeat = false;
-  private int tailRunLength = 0;
-
-  public RunLengthByteWriter(PositionedOutputStream output) {
-    this.output = output;
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-      if (repeat) {
-        output.write(numLiterals - MIN_REPEAT_SIZE);
-        output.write(literals, 0, 1);
-     } else {
-        output.write(-numLiterals);
-        output.write(literals, 0, numLiterals);
-      }
-      repeat = false;
-      tailRunLength = 0;
-      numLiterals = 0;
-    }
-  }
-
-  public void flush() throws IOException {
-    writeValues();
-    output.flush();
-  }
-
-  public void write(byte value) throws IOException {
-    if (numLiterals == 0) {
-      literals[numLiterals++] = value;
-      tailRunLength = 1;
-    } else if (repeat) {
-      if (value == literals[0]) {
-        numLiterals += 1;
-        if (numLiterals == MAX_REPEAT_SIZE) {
-          writeValues();
-        }
-      } else {
-        writeValues();
-        literals[numLiterals++] = value;
-        tailRunLength = 1;
-      }
-    } else {
-      if (value == literals[numLiterals - 1]) {
-        tailRunLength += 1;
-      } else {
-        tailRunLength = 1;
-      }
-      if (tailRunLength == MIN_REPEAT_SIZE) {
-        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
-          repeat = true;
-          numLiterals += 1;
-        } else {
-          numLiterals -= MIN_REPEAT_SIZE - 1;
-          writeValues();
-          literals[0] = value;
-          repeat = true;
-          numLiterals = MIN_REPEAT_SIZE;
-        }
-      } else {
-        literals[numLiterals++] = value;
-        if (numLiterals == MAX_LITERAL_SIZE) {
-          writeValues();
-        }
-      }
-    }
-  }
-
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java b/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
deleted file mode 100644
index b91a263..0000000
--- a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReader.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-
-/**
- * A reader that reads a sequence of integers.
- * */
-public class RunLengthIntegerReader implements IntegerReader {
-  private InStream input;
-  private final boolean signed;
-  private final long[] literals =
-    new long[RunLengthIntegerWriter.MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private int delta = 0;
-  private int used = 0;
-  private boolean repeat = false;
-  private SerializationUtils utils;
-
-  public RunLengthIntegerReader(InStream input, boolean signed) throws IOException {
-    this.input = input;
-    this.signed = signed;
-    this.utils = new SerializationUtils();
-  }
-
-  private void readValues(boolean ignoreEof) throws IOException {
-    int control = input.read();
-    if (control == -1) {
-      if (!ignoreEof) {
-        throw new EOFException("Read past end of RLE integer from " + input);
-      }
-      used = numLiterals = 0;
-      return;
-    } else if (control < 0x80) {
-      numLiterals = control + RunLengthIntegerWriter.MIN_REPEAT_SIZE;
-      used = 0;
-      repeat = true;
-      delta = input.read();
-      if (delta == -1) {
-        throw new EOFException("End of stream in RLE Integer from " + input);
-      }
-      // convert from 0 to 255 to -128 to 127 by converting to a signed byte
-      delta = (byte) (0 + delta);
-      if (signed) {
-        literals[0] = utils.readVslong(input);
-      } else {
-        literals[0] = utils.readVulong(input);
-      }
-    } else {
-      repeat = false;
-      numLiterals = 0x100 - control;
-      used = 0;
-      for(int i=0; i < numLiterals; ++i) {
-        if (signed) {
-          literals[i] = utils.readVslong(input);
-        } else {
-          literals[i] = utils.readVulong(input);
-        }
-      }
-    }
-  }
-
-  @Override
-  public boolean hasNext() throws IOException {
-    return used != numLiterals || input.available() > 0;
-  }
-
-  @Override
-  public long next() throws IOException {
-    long result;
-    if (used == numLiterals) {
-      readValues(false);
-    }
-    if (repeat) {
-      result = literals[0] + (used++) * delta;
-    } else {
-      result = literals[used++];
-    }
-    return result;
-  }
-
-  @Override
-  public void nextVector(ColumnVector previous,
-                         long[] data,
-                         int previousLen) throws IOException {
-    previous.isRepeating = true;
-    for (int i = 0; i < previousLen; i++) {
-      if (!previous.isNull[i]) {
-        data[i] = next();
-      } else {
-        // The default value of null for int type in vectorized
-        // processing is 1, so set that if the value is null
-        data[i] = 1;
-      }
-
-      // The default value for nulls in Vectorization for int types is 1
-      // and given that non null value can also be 1, we need to check for isNull also
-      // when determining the isRepeating flag.
-      if (previous.isRepeating
-          && i > 0
-          && (data[0] != data[i] || previous.isNull[0] != previous.isNull[i])) {
-        previous.isRepeating = false;
-      }
-    }
-  }
-
-  @Override
-  public void nextVector(ColumnVector vector,
-                         int[] data,
-                         int size) throws IOException {
-    if (vector.noNulls) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        data[r] = (int) next();
-      }
-    } else if (!(vector.isRepeating && vector.isNull[0])) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        if (!vector.isNull[r]) {
-          data[r] = (int) next();
-        } else {
-          data[r] = 1;
-        }
-      }
-    }
-  }
-
-  @Override
-  public void seek(PositionProvider index) throws IOException {
-    input.seek(index);
-    int consumed = (int) index.getNext();
-    if (consumed != 0) {
-      // a loop is required for cases where we break the run into two parts
-      while (consumed > 0) {
-        readValues(false);
-        used = consumed;
-        consumed -= numLiterals;
-      }
-    } else {
-      used = 0;
-      numLiterals = 0;
-    }
-  }
-
-  @Override
-  public void skip(long numValues) throws IOException {
-    while (numValues > 0) {
-      if (used == numLiterals) {
-        readValues(false);
-      }
-      long consume = Math.min(numValues, numLiterals - used);
-      used += consume;
-      numValues -= consume;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java b/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
deleted file mode 100644
index 610d9b5..0000000
--- a/orc/src/java/org/apache/orc/impl/RunLengthIntegerReaderV2.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A reader that reads a sequence of light weight compressed integers. Refer
- * {@link RunLengthIntegerWriterV2} for description of various lightweight
- * compression techniques.
- */
-public class RunLengthIntegerReaderV2 implements IntegerReader {
-  public static final Logger LOG = LoggerFactory.getLogger(RunLengthIntegerReaderV2.class);
-
-  private InStream input;
-  private final boolean signed;
-  private final long[] literals = new long[RunLengthIntegerWriterV2.MAX_SCOPE];
-  private boolean isRepeating = false;
-  private int numLiterals = 0;
-  private int used = 0;
-  private final boolean skipCorrupt;
-  private final SerializationUtils utils;
-  private RunLengthIntegerWriterV2.EncodingType currentEncoding;
-
-  public RunLengthIntegerReaderV2(InStream input, boolean signed,
-      boolean skipCorrupt) throws IOException {
-    this.input = input;
-    this.signed = signed;
-    this.skipCorrupt = skipCorrupt;
-    this.utils = new SerializationUtils();
-  }
-
-  private final static RunLengthIntegerWriterV2.EncodingType[] encodings = RunLengthIntegerWriterV2.EncodingType.values();
-  private void readValues(boolean ignoreEof) throws IOException {
-    // read the first 2 bits and determine the encoding type
-    isRepeating = false;
-    int firstByte = input.read();
-    if (firstByte < 0) {
-      if (!ignoreEof) {
-        throw new EOFException("Read past end of RLE integer from " + input);
-      }
-      used = numLiterals = 0;
-      return;
-    }
-    currentEncoding = encodings[(firstByte >>> 6) & 0x03];
-    switch (currentEncoding) {
-    case SHORT_REPEAT: readShortRepeatValues(firstByte); break;
-    case DIRECT: readDirectValues(firstByte); break;
-    case PATCHED_BASE: readPatchedBaseValues(firstByte); break;
-    case DELTA: readDeltaValues(firstByte); break;
-    default: throw new IOException("Unknown encoding " + currentEncoding);
-    }
-  }
-
-  private void readDeltaValues(int firstByte) throws IOException {
-
-    // extract the number of fixed bits
-    int fb = (firstByte >>> 1) & 0x1f;
-    if (fb != 0) {
-      fb = utils.decodeBitWidth(fb);
-    }
-
-    // extract the blob run length
-    int len = (firstByte & 0x01) << 8;
-    len |= input.read();
-
-    // read the first value stored as vint
-    long firstVal = 0;
-    if (signed) {
-      firstVal = utils.readVslong(input);
-    } else {
-      firstVal = utils.readVulong(input);
-    }
-
-    // store first value to result buffer
-    long prevVal = firstVal;
-    literals[numLiterals++] = firstVal;
-
-    // if fixed bits is 0 then all values have fixed delta
-    if (fb == 0) {
-      // read the fixed delta value stored as vint (deltas can be negative even
-      // if all number are positive)
-      long fd = utils.readVslong(input);
-      if (fd == 0) {
-        isRepeating = true;
-        assert numLiterals == 1;
-        Arrays.fill(literals, numLiterals, numLiterals + len, literals[0]);
-        numLiterals += len;
-      } else {
-        // add fixed deltas to adjacent values
-        for(int i = 0; i < len; i++) {
-          literals[numLiterals++] = literals[numLiterals - 2] + fd;
-        }
-      }
-    } else {
-      long deltaBase = utils.readVslong(input);
-      // add delta base and first value
-      literals[numLiterals++] = firstVal + deltaBase;
-      prevVal = literals[numLiterals - 1];
-      len -= 1;
-
-      // write the unpacked values, add it to previous value and store final
-      // value to result buffer. if the delta base value is negative then it
-      // is a decreasing sequence else an increasing sequence
-      utils.readInts(literals, numLiterals, len, fb, input);
-      while (len > 0) {
-        if (deltaBase < 0) {
-          literals[numLiterals] = prevVal - literals[numLiterals];
-        } else {
-          literals[numLiterals] = prevVal + literals[numLiterals];
-        }
-        prevVal = literals[numLiterals];
-        len--;
-        numLiterals++;
-      }
-    }
-  }
-
-  private void readPatchedBaseValues(int firstByte) throws IOException {
-
-    // extract the number of fixed bits
-    int fbo = (firstByte >>> 1) & 0x1f;
-    int fb = utils.decodeBitWidth(fbo);
-
-    // extract the run length of data blob
-    int len = (firstByte & 0x01) << 8;
-    len |= input.read();
-    // runs are always one off
-    len += 1;
-
-    // extract the number of bytes occupied by base
-    int thirdByte = input.read();
-    int bw = (thirdByte >>> 5) & 0x07;
-    // base width is one off
-    bw += 1;
-
-    // extract patch width
-    int pwo = thirdByte & 0x1f;
-    int pw = utils.decodeBitWidth(pwo);
-
-    // read fourth byte and extract patch gap width
-    int fourthByte = input.read();
-    int pgw = (fourthByte >>> 5) & 0x07;
-    // patch gap width is one off
-    pgw += 1;
-
-    // extract the length of the patch list
-    int pl = fourthByte & 0x1f;
-
-    // read the next base width number of bytes to extract base value
-    long base = utils.bytesToLongBE(input, bw);
-    long mask = (1L << ((bw * 8) - 1));
-    // if MSB of base value is 1 then base is negative value else positive
-    if ((base & mask) != 0) {
-      base = base & ~mask;
-      base = -base;
-    }
-
-    // unpack the data blob
-    long[] unpacked = new long[len];
-    utils.readInts(unpacked, 0, len, fb, input);
-
-    // unpack the patch blob
-    long[] unpackedPatch = new long[pl];
-
-    if ((pw + pgw) > 64 && !skipCorrupt) {
-      throw new IOException("Corruption in ORC data encountered. To skip" +
-          " reading corrupted data, set hive.exec.orc.skip.corrupt.data to" +
-          " true");
-    }
-    int bitSize = utils.getClosestFixedBits(pw + pgw);
-    utils.readInts(unpackedPatch, 0, pl, bitSize, input);
-
-    // apply the patch directly when decoding the packed data
-    int patchIdx = 0;
-    long currGap = 0;
-    long currPatch = 0;
-    long patchMask = ((1L << pw) - 1);
-    currGap = unpackedPatch[patchIdx] >>> pw;
-    currPatch = unpackedPatch[patchIdx] & patchMask;
-    long actualGap = 0;
-
-    // special case: gap is >255 then patch value will be 0.
-    // if gap is <=255 then patch value cannot be 0
-    while (currGap == 255 && currPatch == 0) {
-      actualGap += 255;
-      patchIdx++;
-      currGap = unpackedPatch[patchIdx] >>> pw;
-      currPatch = unpackedPatch[patchIdx] & patchMask;
-    }
-    // add the left over gap
-    actualGap += currGap;
-
-    // unpack data blob, patch it (if required), add base to get final result
-    for(int i = 0; i < unpacked.length; i++) {
-      if (i == actualGap) {
-        // extract the patch value
-        long patchedVal = unpacked[i] | (currPatch << fb);
-
-        // add base to patched value
-        literals[numLiterals++] = base + patchedVal;
-
-        // increment the patch to point to next entry in patch list
-        patchIdx++;
-
-        if (patchIdx < pl) {
-          // read the next gap and patch
-          currGap = unpackedPatch[patchIdx] >>> pw;
-          currPatch = unpackedPatch[patchIdx] & patchMask;
-          actualGap = 0;
-
-          // special case: gap is >255 then patch will be 0. if gap is
-          // <=255 then patch cannot be 0
-          while (currGap == 255 && currPatch == 0) {
-            actualGap += 255;
-            patchIdx++;
-            currGap = unpackedPatch[patchIdx] >>> pw;
-            currPatch = unpackedPatch[patchIdx] & patchMask;
-          }
-          // add the left over gap
-          actualGap += currGap;
-
-          // next gap is relative to the current gap
-          actualGap += i;
-        }
-      } else {
-        // no patching required. add base to unpacked value to get final value
-        literals[numLiterals++] = base + unpacked[i];
-      }
-    }
-
-  }
-
-  private void readDirectValues(int firstByte) throws IOException {
-
-    // extract the number of fixed bits
-    int fbo = (firstByte >>> 1) & 0x1f;
-    int fb = utils.decodeBitWidth(fbo);
-
-    // extract the run length
-    int len = (firstByte & 0x01) << 8;
-    len |= input.read();
-    // runs are one off
-    len += 1;
-
-    // write the unpacked values and zigzag decode to result buffer
-    utils.readInts(literals, numLiterals, len, fb, input);
-    if (signed) {
-      for(int i = 0; i < len; i++) {
-        literals[numLiterals] = utils.zigzagDecode(literals[numLiterals]);
-        numLiterals++;
-      }
-    } else {
-      numLiterals += len;
-    }
-  }
-
-  private void readShortRepeatValues(int firstByte) throws IOException {
-
-    // read the number of bytes occupied by the value
-    int size = (firstByte >>> 3) & 0x07;
-    // #bytes are one off
-    size += 1;
-
-    // read the run length
-    int len = firstByte & 0x07;
-    // run lengths values are stored only after MIN_REPEAT value is met
-    len += RunLengthIntegerWriterV2.MIN_REPEAT;
-
-    // read the repeated value which is store using fixed bytes
-    long val = utils.bytesToLongBE(input, size);
-
-    if (signed) {
-      val = utils.zigzagDecode(val);
-    }
-
-    if (numLiterals != 0) {
-      // Currently this always holds, which makes peekNextAvailLength simpler.
-      // If this changes, peekNextAvailLength should be adjusted accordingly.
-      throw new AssertionError("readValues called with existing values present");
-    }
-    // repeat the value for length times
-    isRepeating = true;
-    // TODO: this is not so useful and V1 reader doesn't do that. Fix? Same if delta == 0
-    for(int i = 0; i < len; i++) {
-      literals[i] = val;
-    }
-    numLiterals = len;
-  }
-
-  @Override
-  public boolean hasNext() throws IOException {
-    return used != numLiterals || input.available() > 0;
-  }
-
-  @Override
-  public long next() throws IOException {
-    long result;
-    if (used == numLiterals) {
-      numLiterals = 0;
-      used = 0;
-      readValues(false);
-    }
-    result = literals[used++];
-    return result;
-  }
-
-  @Override
-  public void seek(PositionProvider index) throws IOException {
-    input.seek(index);
-    int consumed = (int) index.getNext();
-    if (consumed != 0) {
-      // a loop is required for cases where we break the run into two
-      // parts
-      while (consumed > 0) {
-        numLiterals = 0;
-        readValues(false);
-        used = consumed;
-        consumed -= numLiterals;
-      }
-    } else {
-      used = 0;
-      numLiterals = 0;
-    }
-  }
-
-  @Override
-  public void skip(long numValues) throws IOException {
-    while (numValues > 0) {
-      if (used == numLiterals) {
-        numLiterals = 0;
-        used = 0;
-        readValues(false);
-      }
-      long consume = Math.min(numValues, numLiterals - used);
-      used += consume;
-      numValues -= consume;
-    }
-  }
-
-  @Override
-  public void nextVector(ColumnVector previous,
-                         long[] data,
-                         int previousLen) throws IOException {
-    previous.isRepeating = true;
-    for (int i = 0; i < previousLen; i++) {
-      if (!previous.isNull[i]) {
-        data[i] = next();
-      } else {
-        // The default value of null for int type in vectorized
-        // processing is 1, so set that if the value is null
-        data[i] = 1;
-      }
-
-      // The default value for nulls in Vectorization for int types is 1
-      // and given that non null value can also be 1, we need to check for isNull also
-      // when determining the isRepeating flag.
-      if (previous.isRepeating
-          && i > 0
-          && (data[0] != data[i] ||
-          previous.isNull[0] != previous.isNull[i])) {
-        previous.isRepeating = false;
-      }
-    }
-  }
-
-  @Override
-  public void nextVector(ColumnVector vector,
-                         int[] data,
-                         int size) throws IOException {
-    if (vector.noNulls) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        data[r] = (int) next();
-      }
-    } else if (!(vector.isRepeating && vector.isNull[0])) {
-      for(int r=0; r < data.length && r < size; ++r) {
-        if (!vector.isNull[r]) {
-          data[r] = (int) next();
-        } else {
-          data[r] = 1;
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java b/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java
deleted file mode 100644
index 3e5f2e2..0000000
--- a/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriter.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-
-/**
- * A streamFactory that writes a sequence of integers. A control byte is written before
- * each run with positive values 0 to 127 meaning 3 to 130 repetitions, each
- * repetition is offset by a delta. If the control byte is -1 to -128, 1 to 128
- * literal vint values follow.
- */
-public class RunLengthIntegerWriter implements IntegerWriter {
-  static final int MIN_REPEAT_SIZE = 3;
-  static final int MAX_DELTA = 127;
-  static final int MIN_DELTA = -128;
-  static final int MAX_LITERAL_SIZE = 128;
-  private static final int MAX_REPEAT_SIZE = 127 + MIN_REPEAT_SIZE;
-  private final PositionedOutputStream output;
-  private final boolean signed;
-  private final long[] literals = new long[MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private long delta = 0;
-  private boolean repeat = false;
-  private int tailRunLength = 0;
-  private SerializationUtils utils;
-
-  public RunLengthIntegerWriter(PositionedOutputStream output,
-                         boolean signed) {
-    this.output = output;
-    this.signed = signed;
-    this.utils = new SerializationUtils();
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-      if (repeat) {
-        output.write(numLiterals - MIN_REPEAT_SIZE);
-        output.write((byte) delta);
-        if (signed) {
-          utils.writeVslong(output, literals[0]);
-        } else {
-          utils.writeVulong(output, literals[0]);
-        }
-      } else {
-        output.write(-numLiterals);
-        for(int i=0; i < numLiterals; ++i) {
-          if (signed) {
-            utils.writeVslong(output, literals[i]);
-          } else {
-            utils.writeVulong(output, literals[i]);
-          }
-        }
-      }
-      repeat = false;
-      numLiterals = 0;
-      tailRunLength = 0;
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    writeValues();
-    output.flush();
-  }
-
-  @Override
-  public void write(long value) throws IOException {
-    if (numLiterals == 0) {
-      literals[numLiterals++] = value;
-      tailRunLength = 1;
-    } else if (repeat) {
-      if (value == literals[0] + delta * numLiterals) {
-        numLiterals += 1;
-        if (numLiterals == MAX_REPEAT_SIZE) {
-          writeValues();
-        }
-      } else {
-        writeValues();
-        literals[numLiterals++] = value;
-        tailRunLength = 1;
-      }
-    } else {
-      if (tailRunLength == 1) {
-        delta = value - literals[numLiterals - 1];
-        if (delta < MIN_DELTA || delta > MAX_DELTA) {
-          tailRunLength = 1;
-        } else {
-          tailRunLength = 2;
-        }
-      } else if (value == literals[numLiterals - 1] + delta) {
-        tailRunLength += 1;
-      } else {
-        delta = value - literals[numLiterals - 1];
-        if (delta < MIN_DELTA || delta > MAX_DELTA) {
-          tailRunLength = 1;
-        } else {
-          tailRunLength = 2;
-        }
-      }
-      if (tailRunLength == MIN_REPEAT_SIZE) {
-        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
-          repeat = true;
-          numLiterals += 1;
-        } else {
-          numLiterals -= MIN_REPEAT_SIZE - 1;
-          long base = literals[numLiterals];
-          writeValues();
-          literals[0] = base;
-          repeat = true;
-          numLiterals = MIN_REPEAT_SIZE;
-        }
-      } else {
-        literals[numLiterals++] = value;
-        if (numLiterals == MAX_LITERAL_SIZE) {
-          writeValues();
-        }
-      }
-    }
-  }
-
-  @Override
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java b/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
deleted file mode 100644
index fab2801..0000000
--- a/orc/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
+++ /dev/null
@@ -1,831 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-
-/**
- * A writer that performs light weight compression over sequence of integers.
- * <p>
- * There are four types of lightweight integer compression
- * <ul>
- * <li>SHORT_REPEAT</li>
- * <li>DIRECT</li>
- * <li>PATCHED_BASE</li>
- * <li>DELTA</li>
- * </ul>
- * </p>
- * The description and format for these types are as below:
- * <p>
- * <b>SHORT_REPEAT:</b> Used for short repeated integer sequences.
- * <ul>
- * <li>1 byte header
- * <ul>
- * <li>2 bits for encoding type</li>
- * <li>3 bits for bytes required for repeating value</li>
- * <li>3 bits for repeat count (MIN_REPEAT + run length)</li>
- * </ul>
- * </li>
- * <li>Blob - repeat value (fixed bytes)</li>
- * </ul>
- * </p>
- * <p>
- * <b>DIRECT:</b> Used for random integer sequences whose number of bit
- * requirement doesn't vary a lot.
- * <ul>
- * <li>2 bytes header
- * <ul>
- * 1st byte
- * <li>2 bits for encoding type</li>
- * <li>5 bits for fixed bit width of values in blob</li>
- * <li>1 bit for storing MSB of run length</li>
- * </ul>
- * <ul>
- * 2nd byte
- * <li>8 bits for lower run length bits</li>
- * </ul>
- * </li>
- * <li>Blob - stores the direct values using fixed bit width. The length of the
- * data blob is (fixed width * run length) bits long</li>
- * </ul>
- * </p>
- * <p>
- * <b>PATCHED_BASE:</b> Used for random integer sequences whose number of bit
- * requirement varies beyond a threshold.
- * <ul>
- * <li>4 bytes header
- * <ul>
- * 1st byte
- * <li>2 bits for encoding type</li>
- * <li>5 bits for fixed bit width of values in blob</li>
- * <li>1 bit for storing MSB of run length</li>
- * </ul>
- * <ul>
- * 2nd byte
- * <li>8 bits for lower run length bits</li>
- * </ul>
- * <ul>
- * 3rd byte
- * <li>3 bits for bytes required to encode base value</li>
- * <li>5 bits for patch width</li>
- * </ul>
- * <ul>
- * 4th byte
- * <li>3 bits for patch gap width</li>
- * <li>5 bits for patch length</li>
- * </ul>
- * </li>
- * <li>Base value - Stored using fixed number of bytes. If MSB is set, base
- * value is negative else positive. Length of base value is (base width * 8)
- * bits.</li>
- * <li>Data blob - Base reduced values as stored using fixed bit width. Length
- * of data blob is (fixed width * run length) bits.</li>
- * <li>Patch blob - Patch blob is a list of gap and patch value. Each entry in
- * the patch list is (patch width + patch gap width) bits long. Gap between the
- * subsequent elements to be patched are stored in upper part of entry whereas
- * patch values are stored in lower part of entry. Length of patch blob is
- * ((patch width + patch gap width) * patch length) bits.</li>
- * </ul>
- * </p>
- * <p>
- * <b>DELTA</b> Used for monotonically increasing or decreasing sequences,
- * sequences with fixed delta values or long repeated sequences.
- * <ul>
- * <li>2 bytes header
- * <ul>
- * 1st byte
- * <li>2 bits for encoding type</li>
- * <li>5 bits for fixed bit width of values in blob</li>
- * <li>1 bit for storing MSB of run length</li>
- * </ul>
- * <ul>
- * 2nd byte
- * <li>8 bits for lower run length bits</li>
- * </ul>
- * </li>
- * <li>Base value - zigzag encoded value written as varint</li>
- * <li>Delta base - zigzag encoded value written as varint</li>
- * <li>Delta blob - only positive values. monotonicity and orderness are decided
- * based on the sign of the base value and delta base</li>
- * </ul>
- * </p>
- */
-public class RunLengthIntegerWriterV2 implements IntegerWriter {
-
-  public enum EncodingType {
-    SHORT_REPEAT, DIRECT, PATCHED_BASE, DELTA
-  }
-
-  static final int MAX_SCOPE = 512;
-  static final int MIN_REPEAT = 3;
-  private static final int MAX_SHORT_REPEAT_LENGTH = 10;
-  private long prevDelta = 0;
-  private int fixedRunLength = 0;
-  private int variableRunLength = 0;
-  private final long[] literals = new long[MAX_SCOPE];
-  private final PositionedOutputStream output;
-  private final boolean signed;
-  private EncodingType encoding;
-  private int numLiterals;
-  private final long[] zigzagLiterals = new long[MAX_SCOPE];
-  private final long[] baseRedLiterals = new long[MAX_SCOPE];
-  private final long[] adjDeltas = new long[MAX_SCOPE];
-  private long fixedDelta;
-  private int zzBits90p;
-  private int zzBits100p;
-  private int brBits95p;
-  private int brBits100p;
-  private int bitsDeltaMax;
-  private int patchWidth;
-  private int patchGapWidth;
-  private int patchLength;
-  private long[] gapVsPatchList;
-  private long min;
-  private boolean isFixedDelta;
-  private SerializationUtils utils;
-  private boolean alignedBitpacking;
-
-  RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed) {
-    this(output, signed, true);
-  }
-
-  public RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed,
-      boolean alignedBitpacking) {
-    this.output = output;
-    this.signed = signed;
-    this.alignedBitpacking = alignedBitpacking;
-    this.utils = new SerializationUtils();
-    clear();
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-
-      if (encoding.equals(EncodingType.SHORT_REPEAT)) {
-        writeShortRepeatValues();
-      } else if (encoding.equals(EncodingType.DIRECT)) {
-        writeDirectValues();
-      } else if (encoding.equals(EncodingType.PATCHED_BASE)) {
-        writePatchedBaseValues();
-      } else {
-        writeDeltaValues();
-      }
-
-      // clear all the variables
-      clear();
-    }
-  }
-
-  private void writeDeltaValues() throws IOException {
-    int len = 0;
-    int fb = bitsDeltaMax;
-    int efb = 0;
-
-    if (alignedBitpacking) {
-      fb = utils.getClosestAlignedFixedBits(fb);
-    }
-
-    if (isFixedDelta) {
-      // if fixed run length is greater than threshold then it will be fixed
-      // delta sequence with delta value 0 else fixed delta sequence with
-      // non-zero delta value
-      if (fixedRunLength > MIN_REPEAT) {
-        // ex. sequence: 2 2 2 2 2 2 2 2
-        len = fixedRunLength - 1;
-        fixedRunLength = 0;
-      } else {
-        // ex. sequence: 4 6 8 10 12 14 16
-        len = variableRunLength - 1;
-        variableRunLength = 0;
-      }
-    } else {
-      // fixed width 0 is used for long repeating values.
-      // sequences that require only 1 bit to encode will have an additional bit
-      if (fb == 1) {
-        fb = 2;
-      }
-      efb = utils.encodeBitWidth(fb);
-      efb = efb << 1;
-      len = variableRunLength - 1;
-      variableRunLength = 0;
-    }
-
-    // extract the 9th bit of run length
-    final int tailBits = (len & 0x100) >>> 8;
-
-    // create first byte of the header
-    final int headerFirstByte = getOpcode() | efb | tailBits;
-
-    // second byte of the header stores the remaining 8 bits of runlength
-    final int headerSecondByte = len & 0xff;
-
-    // write header
-    output.write(headerFirstByte);
-    output.write(headerSecondByte);
-
-    // store the first value from zigzag literal array
-    if (signed) {
-      utils.writeVslong(output, literals[0]);
-    } else {
-      utils.writeVulong(output, literals[0]);
-    }
-
-    if (isFixedDelta) {
-      // if delta is fixed then we don't need to store delta blob
-      utils.writeVslong(output, fixedDelta);
-    } else {
-      // store the first value as delta value using zigzag encoding
-      utils.writeVslong(output, adjDeltas[0]);
-
-      // adjacent delta values are bit packed. The length of adjDeltas array is
-      // always one less than the number of literals (delta difference for n
-      // elements is n-1). We have already written one element, write the
-      // remaining numLiterals - 2 elements here
-      utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output);
-    }
-  }
-
-  private void writePatchedBaseValues() throws IOException {
-
-    // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding
-    // because patch is applied to MSB bits. For example: If fixed bit width of
-    // base value is 7 bits and if patch is 3 bits, the actual value is
-    // constructed by shifting the patch to left by 7 positions.
-    // actual_value = patch << 7 | base_value
-    // So, if we align base_value then actual_value can not be reconstructed.
-
-    // write the number of fixed bits required in next 5 bits
-    final int fb = brBits95p;
-    final int efb = utils.encodeBitWidth(fb) << 1;
-
-    // adjust variable run length, they are one off
-    variableRunLength -= 1;
-
-    // extract the 9th bit of run length
-    final int tailBits = (variableRunLength & 0x100) >>> 8;
-
-    // create first byte of the header
-    final int headerFirstByte = getOpcode() | efb | tailBits;
-
-    // second byte of the header stores the remaining 8 bits of runlength
-    final int headerSecondByte = variableRunLength & 0xff;
-
-    // if the min value is negative toggle the sign
-    final boolean isNegative = min < 0 ? true : false;
-    if (isNegative) {
-      min = -min;
-    }
-
-    // find the number of bytes required for base and shift it by 5 bits
-    // to accommodate patch width. The additional bit is used to store the sign
-    // of the base value.
-    final int baseWidth = utils.findClosestNumBits(min) + 1;
-    final int baseBytes = baseWidth % 8 == 0 ? baseWidth / 8 : (baseWidth / 8) + 1;
-    final int bb = (baseBytes - 1) << 5;
-
-    // if the base value is negative then set MSB to 1
-    if (isNegative) {
-      min |= (1L << ((baseBytes * 8) - 1));
-    }
-
-    // third byte contains 3 bits for number of bytes occupied by base
-    // and 5 bits for patchWidth
-    final int headerThirdByte = bb | utils.encodeBitWidth(patchWidth);
-
-    // fourth byte contains 3 bits for page gap width and 5 bits for
-    // patch length
-    final int headerFourthByte = (patchGapWidth - 1) << 5 | patchLength;
-
-    // write header
-    output.write(headerFirstByte);
-    output.write(headerSecondByte);
-    output.write(headerThirdByte);
-    output.write(headerFourthByte);
-
-    // write the base value using fixed bytes in big endian order
-    for(int i = baseBytes - 1; i >= 0; i--) {
-      byte b = (byte) ((min >>> (i * 8)) & 0xff);
-      output.write(b);
-    }
-
-    // base reduced literals are bit packed
-    int closestFixedBits = utils.getClosestFixedBits(fb);
-
-    utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits,
-        output);
-
-    // write patch list
-    closestFixedBits = utils.getClosestFixedBits(patchGapWidth + patchWidth);
-
-    utils.writeInts(gapVsPatchList, 0, gapVsPatchList.length, closestFixedBits,
-        output);
-
-    // reset run length
-    variableRunLength = 0;
-  }
-
-  /**
-   * Store the opcode in 2 MSB bits
-   * @return opcode
-   */
-  private int getOpcode() {
-    return encoding.ordinal() << 6;
-  }
-
-  private void writeDirectValues() throws IOException {
-
-    // write the number of fixed bits required in next 5 bits
-    int fb = zzBits100p;
-
-    if (alignedBitpacking) {
-      fb = utils.getClosestAlignedFixedBits(fb);
-    }
-
-    final int efb = utils.encodeBitWidth(fb) << 1;
-
-    // adjust variable run length
-    variableRunLength -= 1;
-
-    // extract the 9th bit of run length
-    final int tailBits = (variableRunLength & 0x100) >>> 8;
-
-    // create first byte of the header
-    final int headerFirstByte = getOpcode() | efb | tailBits;
-
-    // second byte of the header stores the remaining 8 bits of runlength
-    final int headerSecondByte = variableRunLength & 0xff;
-
-    // write header
-    output.write(headerFirstByte);
-    output.write(headerSecondByte);
-
-    // bit packing the zigzag encoded literals
-    utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output);
-
-    // reset run length
-    variableRunLength = 0;
-  }
-
-  private void writeShortRepeatValues() throws IOException {
-    // get the value that is repeating, compute the bits and bytes required
-    long repeatVal = 0;
-    if (signed) {
-      repeatVal = utils.zigzagEncode(literals[0]);
-    } else {
-      repeatVal = literals[0];
-    }
-
-    final int numBitsRepeatVal = utils.findClosestNumBits(repeatVal);
-    final int numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? numBitsRepeatVal >>> 3
-        : (numBitsRepeatVal >>> 3) + 1;
-
-    // write encoding type in top 2 bits
-    int header = getOpcode();
-
-    // write the number of bytes required for the value
-    header |= ((numBytesRepeatVal - 1) << 3);
-
-    // write the run length
-    fixedRunLength -= MIN_REPEAT;
-    header |= fixedRunLength;
-
-    // write the header
-    output.write(header);
-
-    // write the repeating value in big endian byte order
-    for(int i = numBytesRepeatVal - 1; i >= 0; i--) {
-      int b = (int) ((repeatVal >>> (i * 8)) & 0xff);
-      output.write(b);
-    }
-
-    fixedRunLength = 0;
-  }
-
-  private void determineEncoding() {
-
-    // we need to compute zigzag values for DIRECT encoding if we decide to
-    // break early for delta overflows or for shorter runs
-    computeZigZagLiterals();
-
-    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
-
-    // not a big win for shorter runs to determine encoding
-    if (numLiterals <= MIN_REPEAT) {
-      encoding = EncodingType.DIRECT;
-      return;
-    }
-
-    // DELTA encoding check
-
-    // for identifying monotonic sequences
-    boolean isIncreasing = true;
-    boolean isDecreasing = true;
-    this.isFixedDelta = true;
-
-    this.min = literals[0];
-    long max = literals[0];
-    final long initialDelta = literals[1] - literals[0];
-    long currDelta = initialDelta;
-    long deltaMax = initialDelta;
-    this.adjDeltas[0] = initialDelta;
-
-    for (int i = 1; i < numLiterals; i++) {
-      final long l1 = literals[i];
-      final long l0 = literals[i - 1];
-      currDelta = l1 - l0;
-      min = Math.min(min, l1);
-      max = Math.max(max, l1);
-
-      isIncreasing &= (l0 <= l1);
-      isDecreasing &= (l0 >= l1);
-
-      isFixedDelta &= (currDelta == initialDelta);
-      if (i > 1) {
-        adjDeltas[i - 1] = Math.abs(currDelta);
-        deltaMax = Math.max(deltaMax, adjDeltas[i - 1]);
-      }
-    }
-
-    // its faster to exit under delta overflow condition without checking for
-    // PATCHED_BASE condition as encoding using DIRECT is faster and has less
-    // overhead than PATCHED_BASE
-    if (!utils.isSafeSubtract(max, min)) {
-      encoding = EncodingType.DIRECT;
-      return;
-    }
-
-    // invariant - subtracting any number from any other in the literals after
-    // this point won't overflow
-
-    // if min is equal to max then the delta is 0, this condition happens for
-    // fixed values run >10 which cannot be encoded with SHORT_REPEAT
-    if (min == max) {
-      assert isFixedDelta : min + "==" + max +
-          ", isFixedDelta cannot be false";
-      assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
-      fixedDelta = 0;
-      encoding = EncodingType.DELTA;
-      return;
-    }
-
-    if (isFixedDelta) {
-      assert currDelta == initialDelta
-          : "currDelta should be equal to initialDelta for fixed delta encoding";
-      encoding = EncodingType.DELTA;
-      fixedDelta = currDelta;
-      return;
-    }
-
-    // if initialDelta is 0 then we cannot delta encode as we cannot identify
-    // the sign of deltas (increasing or decreasing)
-    if (initialDelta != 0) {
-      // stores the number of bits required for packing delta blob in
-      // delta encoding
-      bitsDeltaMax = utils.findClosestNumBits(deltaMax);
-
-      // monotonic condition
-      if (isIncreasing || isDecreasing) {
-        encoding = EncodingType.DELTA;
-        return;
-      }
-    }
-
-    // PATCHED_BASE encoding check
-
-    // percentile values are computed for the zigzag encoded values. if the
-    // number of bit requirement between 90th and 100th percentile varies
-    // beyond a threshold then we need to patch the values. if the variation
-    // is not significant then we can use direct encoding
-
-    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 0.9);
-    int diffBitsLH = zzBits100p - zzBits90p;
-
-    // if the difference between 90th percentile and 100th percentile fixed
-    // bits is > 1 then we need patch the values
-    if (diffBitsLH > 1) {
-
-      // patching is done only on base reduced values.
-      // remove base from literals
-      for (int i = 0; i < numLiterals; i++) {
-        baseRedLiterals[i] = literals[i] - min;
-      }
-
-      // 95th percentile width is used to determine max allowed value
-      // after which patching will be done
-      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
-
-      // 100th percentile is used to compute the max patch width
-      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0);
-
-      // after base reducing the values, if the difference in bits between
-      // 95th percentile and 100th percentile value is zero then there
-      // is no point in patching the values, in which case we will
-      // fallback to DIRECT encoding.
-      // The decision to use patched base was based on zigzag values, but the
-      // actual patching is done on base reduced literals.
-      if ((brBits100p - brBits95p) != 0) {
-        encoding = EncodingType.PATCHED_BASE;
-        preparePatchedBlob();
-        return;
-      } else {
-        encoding = EncodingType.DIRECT;
-        return;
-      }
-    } else {
-      // if difference in bits between 95th percentile and 100th percentile is
-      // 0, then patch length will become 0. Hence we will fallback to direct
-      encoding = EncodingType.DIRECT;
-      return;
-    }
-  }
-
-  private void computeZigZagLiterals() {
-    // populate zigzag encoded literals
-    long zzEncVal = 0;
-    for (int i = 0; i < numLiterals; i++) {
-      if (signed) {
-        zzEncVal = utils.zigzagEncode(literals[i]);
-      } else {
-        zzEncVal = literals[i];
-      }
-      zigzagLiterals[i] = zzEncVal;
-    }
-  }
-
-  private void preparePatchedBlob() {
-    // mask will be max value beyond which patch will be generated
-    long mask = (1L << brBits95p) - 1;
-
-    // since we are considering only 95 percentile, the size of gap and
-    // patch array can contain only be 5% values
-    patchLength = (int) Math.ceil((numLiterals * 0.05));
-
-    int[] gapList = new int[patchLength];
-    long[] patchList = new long[patchLength];
-
-    // #bit for patch
-    patchWidth = brBits100p - brBits95p;
-    patchWidth = utils.getClosestFixedBits(patchWidth);
-
-    // if patch bit requirement is 64 then it will not possible to pack
-    // gap and patch together in a long. To make sure gap and patch can be
-    // packed together adjust the patch width
-    if (patchWidth == 64) {
-      patchWidth = 56;
-      brBits95p = 8;
-      mask = (1L << brBits95p) - 1;
-    }
-
-    int gapIdx = 0;
-    int patchIdx = 0;
-    int prev = 0;
-    int gap = 0;
-    int maxGap = 0;
-
-    for(int i = 0; i < numLiterals; i++) {
-      // if value is above mask then create the patch and record the gap
-      if (baseRedLiterals[i] > mask) {
-        gap = i - prev;
-        if (gap > maxGap) {
-          maxGap = gap;
-        }
-
-        // gaps are relative, so store the previous patched value index
-        prev = i;
-        gapList[gapIdx++] = gap;
-
-        // extract the most significant bits that are over mask bits
-        long patch = baseRedLiterals[i] >>> brBits95p;
-        patchList[patchIdx++] = patch;
-
-        // strip off the MSB to enable safe bit packing
-        baseRedLiterals[i] &= mask;
-      }
-    }
-
-    // adjust the patch length to number of entries in gap list
-    patchLength = gapIdx;
-
-    // if the element to be patched is the first and only element then
-    // max gap will be 0, but to store the gap as 0 we need atleast 1 bit
-    if (maxGap == 0 && patchLength != 0) {
-      patchGapWidth = 1;
-    } else {
-      patchGapWidth = utils.findClosestNumBits(maxGap);
-    }
-
-    // special case: if the patch gap width is greater than 256, then
-    // we need 9 bits to encode the gap width. But we only have 3 bits in
-    // header to record the gap width. To deal with this case, we will save
-    // two entries in patch list in the following way
-    // 256 gap width => 0 for patch value
-    // actual gap - 256 => actual patch value
-    // We will do the same for gap width = 511. If the element to be patched is
-    // the last element in the scope then gap width will be 511. In this case we
-    // will have 3 entries in the patch list in the following way
-    // 255 gap width => 0 for patch value
-    // 255 gap width => 0 for patch value
-    // 1 gap width => actual patch value
-    if (patchGapWidth > 8) {
-      patchGapWidth = 8;
-      // for gap = 511, we need two additional entries in patch list
-      if (maxGap == 511) {
-        patchLength += 2;
-      } else {
-        patchLength += 1;
-      }
-    }
-
-    // create gap vs patch list
-    gapIdx = 0;
-    patchIdx = 0;
-    gapVsPatchList = new long[patchLength];
-    for(int i = 0; i < patchLength; i++) {
-      long g = gapList[gapIdx++];
-      long p = patchList[patchIdx++];
-      while (g > 255) {
-        gapVsPatchList[i++] = (255L << patchWidth);
-        g -= 255;
-      }
-
-      // store patch value in LSBs and gap in MSBs
-      gapVsPatchList[i] = (g << patchWidth) | p;
-    }
-  }
-
-  /**
-   * clears all the variables
-   */
-  private void clear() {
-    numLiterals = 0;
-    encoding = null;
-    prevDelta = 0;
-    fixedDelta = 0;
-    zzBits90p = 0;
-    zzBits100p = 0;
-    brBits95p = 0;
-    brBits100p = 0;
-    bitsDeltaMax = 0;
-    patchGapWidth = 0;
-    patchLength = 0;
-    patchWidth = 0;
-    gapVsPatchList = null;
-    min = 0;
-    isFixedDelta = true;
-  }
-
-  @Override
-  public void flush() throws IOException {
-    if (numLiterals != 0) {
-      if (variableRunLength != 0) {
-        determineEncoding();
-        writeValues();
-      } else if (fixedRunLength != 0) {
-        if (fixedRunLength < MIN_REPEAT) {
-          variableRunLength = fixedRunLength;
-          fixedRunLength = 0;
-          determineEncoding();
-          writeValues();
-        } else if (fixedRunLength >= MIN_REPEAT
-            && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
-          encoding = EncodingType.SHORT_REPEAT;
-          writeValues();
-        } else {
-          encoding = EncodingType.DELTA;
-          isFixedDelta = true;
-          writeValues();
-        }
-      }
-    }
-    output.flush();
-  }
-
-  @Override
-  public void write(long val) throws IOException {
-    if (numLiterals == 0) {
-      initializeLiterals(val);
-    } else {
-      if (numLiterals == 1) {
-        prevDelta = val - literals[0];
-        literals[numLiterals++] = val;
-        // if both values are same count as fixed run else variable run
-        if (val == literals[0]) {
-          fixedRunLength = 2;
-          variableRunLength = 0;
-        } else {
-          fixedRunLength = 0;
-          variableRunLength = 2;
-        }
-      } else {
-        long currentDelta = val - literals[numLiterals - 1];
-        if (prevDelta == 0 && currentDelta == 0) {
-          // fixed delta run
-
-          literals[numLiterals++] = val;
-
-          // if variable run is non-zero then we are seeing repeating
-          // values at the end of variable run in which case keep
-          // updating variable and fixed runs
-          if (variableRunLength > 0) {
-            fixedRunLength = 2;
-          }
-          fixedRunLength += 1;
-
-          // if fixed run met the minimum condition and if variable
-          // run is non-zero then flush the variable run and shift the
-          // tail fixed runs to start of the buffer
-          if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
-            numLiterals -= MIN_REPEAT;
-            variableRunLength -= MIN_REPEAT - 1;
-            // copy the tail fixed runs
-            long[] tailVals = new long[MIN_REPEAT];
-            System.arraycopy(literals, numLiterals, tailVals, 0, MIN_REPEAT);
-
-            // determine variable encoding and flush values
-            determineEncoding();
-            writeValues();
-
-            // shift tail fixed runs to beginning of the buffer
-            for(long l : tailVals) {
-              literals[numLiterals++] = l;
-            }
-          }
-
-          // if fixed runs reached max repeat length then write values
-          if (fixedRunLength == MAX_SCOPE) {
-            determineEncoding();
-            writeValues();
-          }
-        } else {
-          // variable delta run
-
-          // if fixed run length is non-zero and if it satisfies the
-          // short repeat conditions then write the values as short repeats
-          // else use delta encoding
-          if (fixedRunLength >= MIN_REPEAT) {
-            if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
-              encoding = EncodingType.SHORT_REPEAT;
-              writeValues();
-            } else {
-              encoding = EncodingType.DELTA;
-              isFixedDelta = true;
-              writeValues();
-            }
-          }
-
-          // if fixed run length is <MIN_REPEAT and current value is
-          // different from previous then treat it as variable run
-          if (fixedRunLength > 0 && fixedRunLength < MIN_REPEAT) {
-            if (val != literals[numLiterals - 1]) {
-              variableRunLength = fixedRunLength;
-              fixedRunLength = 0;
-            }
-          }
-
-          // after writing values re-initialize the variables
-          if (numLiterals == 0) {
-            initializeLiterals(val);
-          } else {
-            // keep updating variable run lengths
-            prevDelta = val - literals[numLiterals - 1];
-            literals[numLiterals++] = val;
-            variableRunLength += 1;
-
-            // if variable run length reach the max scope, write it
-            if (variableRunLength == MAX_SCOPE) {
-              determineEncoding();
-              writeValues();
-            }
-          }
-        }
-      }
-    }
-  }
-
-  private void initializeLiterals(long val) {
-    literals[numLiterals++] = val;
-    fixedRunLength = 1;
-    variableRunLength = 1;
-  }
-
-  @Override
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/SchemaEvolution.java b/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
deleted file mode 100644
index c1bd2b7..0000000
--- a/orc/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ /dev/null
@@ -1,399 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.orc.TypeDescription;
-
-/**
- * Take the file types and the (optional) configuration column names/types and see if there
- * has been schema evolution.
- */
-public class SchemaEvolution {
-  // indexed by reader column id
-  private final TypeDescription[] readerFileTypes;
-  // indexed by reader column id
-  private final boolean[] readerIncluded;
-  // the offset to the first column id ignoring any ACID columns
-  private final int readerColumnOffset;
-  // indexed by file column id
-  private final boolean[] fileIncluded;
-  private final TypeDescription fileSchema;
-  private final TypeDescription readerSchema;
-  private boolean hasConversion;
-  // indexed by reader column id
-  private final boolean[] ppdSafeConversion;
-
-  public SchemaEvolution(TypeDescription fileSchema, boolean[] includedCols) {
-    this(fileSchema, null, includedCols);
-  }
-
-  public SchemaEvolution(TypeDescription fileSchema,
-                         TypeDescription readerSchema,
-                         boolean[] includeCols) {
-    this.readerIncluded = includeCols == null ? null : Arrays.copyOf(includeCols, includeCols.length);
-    this.fileIncluded = new boolean[fileSchema.getMaximumId() + 1];
-    this.hasConversion = false;
-    this.fileSchema = fileSchema;
-    boolean isAcid = checkAcidSchema(fileSchema);
-    this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
-    if (readerSchema != null) {
-      if (isAcid) {
-        this.readerSchema = createEventSchema(readerSchema);
-      } else {
-        this.readerSchema = readerSchema;
-      }
-      if (readerIncluded != null &&
-          readerIncluded.length + readerColumnOffset != this.readerSchema.getMaximumId() + 1) {
-        throw new IllegalArgumentException("Include vector the wrong length: " +
-            this.readerSchema.toJson() + " with include length " +
-            readerIncluded.length);
-      }
-      this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
-      buildConversionFileTypesArray(fileSchema, this.readerSchema);
-    } else {
-      this.readerSchema = fileSchema;
-      this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1];
-      if (readerIncluded != null &&
-          readerIncluded.length + readerColumnOffset != this.readerSchema.getMaximumId() + 1) {
-        throw new IllegalArgumentException("Include vector the wrong length: " +
-            this.readerSchema.toJson() + " with include length " +
-            readerIncluded.length);
-      }
-      buildSameSchemaFileTypesArray();
-    }
-    this.ppdSafeConversion = populatePpdSafeConversion();
-  }
-
-  public TypeDescription getReaderSchema() {
-    return readerSchema;
-  }
-
-  /**
-   * Returns the non-ACID (aka base) reader type description.
-   *
-   * @return the reader type ignoring the ACID rowid columns, if any
-   */
-  public TypeDescription getReaderBaseSchema() {
-    return readerSchema.findSubtype(readerColumnOffset);
-  }
-
-  /**
-   * Is there Schema Evolution data type conversion?
-   * @return
-   */
-  public boolean hasConversion() {
-    return hasConversion;
-  }
-
-  public TypeDescription getFileType(TypeDescription readerType) {
-    return getFileType(readerType.getId());
-  }
-
-  /**
-   * Get whether each column is included from the reader's point of view.
-   * @return a boolean array indexed by reader column id
-   */
-  public boolean[] getReaderIncluded() {
-    return readerIncluded;
-  }
-
-  /**
-   * Get whether each column is included from the file's point of view.
-   * @return a boolean array indexed by file column id
-   */
-  public boolean[] getFileIncluded() {
-    return fileIncluded;
-  }
-
-  /**
-   * Get the file type by reader type id.
-   * @param id reader column id
-   * @return
-   */
-  public TypeDescription getFileType(int id) {
-    return readerFileTypes[id];
-  }
-
-  /**
-   * Check if column is safe for ppd evaluation
-   * @param colId reader column id
-   * @return true if the specified column is safe for ppd evaluation else false
-   */
-  public boolean isPPDSafeConversion(final int colId) {
-    if (hasConversion()) {
-      if (colId < 0 || colId >= ppdSafeConversion.length) {
-        return false;
-      }
-      return ppdSafeConversion[colId];
-    }
-
-    // when there is no schema evolution PPD is safe
-    return true;
-  }
-
-  private boolean[] populatePpdSafeConversion() {
-    if (fileSchema == null || readerSchema == null || readerFileTypes == null) {
-      return null;
-    }
-
-    boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
-    boolean safePpd = validatePPDConversion(fileSchema, readerSchema);
-    result[readerSchema.getId()] = safePpd;
-    List<TypeDescription> children = readerSchema.getChildren();
-    if (children != null) {
-      for (TypeDescription child : children) {
-        TypeDescription fileType = getFileType(child.getId());
-        safePpd = validatePPDConversion(fileType, child);
-        result[child.getId()] = safePpd;
-      }
-    }
-    return result;
-  }
-
-  private boolean validatePPDConversion(final TypeDescription fileType,
-      final TypeDescription readerType) {
-    if (fileType == null) {
-      return false;
-    }
-    if (fileType.getCategory().isPrimitive()) {
-      if (fileType.getCategory().equals(readerType.getCategory())) {
-        // for decimals alone do equality check to not mess up with precision change
-        if (fileType.getCategory().equals(TypeDescription.Category.DECIMAL) &&
-            !fileType.equals(readerType)) {
-          return false;
-        }
-        return true;
-      }
-
-      // only integer and string evolutions are safe
-      // byte -> short -> int -> long
-      // string <-> char <-> varchar
-      // NOTE: Float to double evolution is not safe as floats are stored as doubles in ORC's
-      // internal index, but when doing predicate evaluation for queries like "select * from
-      // orc_float where f = 74.72" the constant on the filter is converted from string -> double
-      // so the precisions will be different and the comparison will fail.
-      // Soon, we should convert all sargs that compare equality between floats or
-      // doubles to range predicates.
-
-      // Similarly string -> char and varchar -> char and vice versa is not possible, as ORC stores
-      // char with padded spaces in its internal index.
-      switch (fileType.getCategory()) {
-        case BYTE:
-          if (readerType.getCategory().equals(TypeDescription.Category.SHORT) ||
-              readerType.getCategory().equals(TypeDescription.Category.INT) ||
-              readerType.getCategory().equals(TypeDescription.Category.LONG)) {
-            return true;
-          }
-          break;
-        case SHORT:
-          if (readerType.getCategory().equals(TypeDescription.Category.INT) ||
-              readerType.getCategory().equals(TypeDescription.Category.LONG)) {
-            return true;
-          }
-          break;
-        case INT:
-          if (readerType.getCategory().equals(TypeDescription.Category.LONG)) {
-            return true;
-          }
-          break;
-        case STRING:
-          if (readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) {
-            return true;
-          }
-          break;
-        case VARCHAR:
-          if (readerType.getCategory().equals(TypeDescription.Category.STRING)) {
-            return true;
-          }
-          break;
-        default:
-          break;
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Should we read the given reader column?
-   * @param readerId the id of column in the extended reader schema
-   * @return true if the column should be read
-   */
-  public boolean includeReaderColumn(int readerId) {
-    return readerIncluded == null ||
-        readerId <= readerColumnOffset ||
-        readerIncluded[readerId - readerColumnOffset];
-  }
-
-  void buildConversionFileTypesArray(TypeDescription fileType,
-                                     TypeDescription readerType) {
-    // if the column isn't included, don't map it
-    int readerId = readerType.getId();
-    if (!includeReaderColumn(readerId)) {
-      return;
-    }
-    boolean isOk = true;
-    // check the easy case first
-    if (fileType.getCategory() == readerType.getCategory()) {
-      switch (readerType.getCategory()) {
-        case BOOLEAN:
-        case BYTE:
-        case SHORT:
-        case INT:
-        case LONG:
-        case DOUBLE:
-        case FLOAT:
-        case STRING:
-        case TIMESTAMP:
-        case BINARY:
-        case DATE:
-          // these are always a match
-          break;
-        case CHAR:
-        case VARCHAR:
-          // We do conversion when same CHAR/VARCHAR type but different maxLength.
-          if (fileType.getMaxLength() != readerType.getMaxLength()) {
-            hasConversion = true;
-          }
-          break;
-        case DECIMAL:
-          // We do conversion when same DECIMAL type but different precision/scale.
-          if (fileType.getPrecision() != readerType.getPrecision() ||
-              fileType.getScale() != readerType.getScale()) {
-            hasConversion = true;
-          }
-          break;
-        case UNION:
-        case MAP:
-        case LIST: {
-          // these must be an exact match
-          List<TypeDescription> fileChildren = fileType.getChildren();
-          List<TypeDescription> readerChildren = readerType.getChildren();
-          if (fileChildren.size() == readerChildren.size()) {
-            for(int i=0; i < fileChildren.size(); ++i) {
-              buildConversionFileTypesArray(fileChildren.get(i), readerChildren.get(i));
-            }
-          } else {
-            isOk = false;
-          }
-          break;
-        }
-        case STRUCT: {
-          // allow either side to have fewer fields than the other
-          List<TypeDescription> fileChildren = fileType.getChildren();
-          List<TypeDescription> readerChildren = readerType.getChildren();
-          if (fileChildren.size() != readerChildren.size()) {
-            hasConversion = true;
-          }
-          int jointSize = Math.min(fileChildren.size(), readerChildren.size());
-          for(int i=0; i < jointSize; ++i) {
-            buildConversionFileTypesArray(fileChildren.get(i), readerChildren.get(i));
-          }
-          break;
-        }
-        default:
-          throw new IllegalArgumentException("Unknown type " + readerType);
-      }
-    } else {
-      /*
-       * Check for the few cases where will not convert....
-       */
-
-      isOk = ConvertTreeReaderFactory.canConvert(fileType, readerType);
-      hasConversion = true;
-    }
-    if (isOk) {
-      if (readerFileTypes[readerId] != null) {
-        throw new RuntimeException("reader to file type entry already assigned");
-      }
-      readerFileTypes[readerId] = fileType;
-      fileIncluded[fileType.getId()] = true;
-    } else {
-      throw new IllegalArgumentException(
-          String.format(
-              "ORC does not support type conversion from file type %s (%d) to reader type %s (%d)",
-              fileType.toString(), fileType.getId(),
-              readerType.toString(), readerId));
-    }
-  }
-
-  /**
-   * Use to make a reader to file type array when the schema is the same.
-   * @return
-   */
-  private void buildSameSchemaFileTypesArray() {
-    buildSameSchemaFileTypesArrayRecurse(readerSchema);
-  }
-
-  void buildSameSchemaFileTypesArrayRecurse(TypeDescription readerType) {
-    int id = readerType.getId();
-    if (!includeReaderColumn(id)) {
-      return;
-    }
-    if (readerFileTypes[id] != null) {
-      throw new RuntimeException("reader to file type entry already assigned");
-    }
-    readerFileTypes[id] = readerType;
-    fileIncluded[id] = true;
-    List<TypeDescription> children = readerType.getChildren();
-    if (children != null) {
-      for (TypeDescription child : children) {
-        buildSameSchemaFileTypesArrayRecurse(child);
-      }
-    }
-  }
-
-  private static boolean checkAcidSchema(TypeDescription type) {
-    if (type.getCategory().equals(TypeDescription.Category.STRUCT)) {
-      List<String> rootFields = type.getFieldNames();
-      if (acidEventFieldNames.equals(rootFields)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /**
-   * @param typeDescr
-   * @return ORC types for the ACID event based on the row's type description
-   */
-  public static TypeDescription createEventSchema(TypeDescription typeDescr) {
-    TypeDescription result = TypeDescription.createStruct()
-        .addField("operation", TypeDescription.createInt())
-        .addField("originalTransaction", TypeDescription.createLong())
-        .addField("bucket", TypeDescription.createInt())
-        .addField("rowId", TypeDescription.createLong())
-        .addField("currentTransaction", TypeDescription.createLong())
-        .addField("row", typeDescr.clone());
-    return result;
-  }
-
-  public static final List<String> acidEventFieldNames= new ArrayList<String>();
-  static {
-    acidEventFieldNames.add("operation");
-    acidEventFieldNames.add("originalTransaction");
-    acidEventFieldNames.add("bucket");
-    acidEventFieldNames.add("rowId");
-    acidEventFieldNames.add("currentTransaction");
-    acidEventFieldNames.add("row");
-  }
-}


[16/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/SerializationUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/SerializationUtils.java b/orc/src/java/org/apache/orc/impl/SerializationUtils.java
deleted file mode 100644
index 2e5a59b..0000000
--- a/orc/src/java/org/apache/orc/impl/SerializationUtils.java
+++ /dev/null
@@ -1,1311 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.math.BigInteger;
-
-public final class SerializationUtils {
-
-  private final static int BUFFER_SIZE = 64;
-  private final byte[] readBuffer;
-  private final byte[] writeBuffer;
-
-  public SerializationUtils() {
-    this.readBuffer = new byte[BUFFER_SIZE];
-    this.writeBuffer = new byte[BUFFER_SIZE];
-  }
-
-  public void writeVulong(OutputStream output,
-                          long value) throws IOException {
-    while (true) {
-      if ((value & ~0x7f) == 0) {
-        output.write((byte) value);
-        return;
-      } else {
-        output.write((byte) (0x80 | (value & 0x7f)));
-        value >>>= 7;
-      }
-    }
-  }
-
-  public void writeVslong(OutputStream output,
-                          long value) throws IOException {
-    writeVulong(output, (value << 1) ^ (value >> 63));
-  }
-
-
-  public long readVulong(InputStream in) throws IOException {
-    long result = 0;
-    long b;
-    int offset = 0;
-    do {
-      b = in.read();
-      if (b == -1) {
-        throw new EOFException("Reading Vulong past EOF");
-      }
-      result |= (0x7f & b) << offset;
-      offset += 7;
-    } while (b >= 0x80);
-    return result;
-  }
-
-  public long readVslong(InputStream in) throws IOException {
-    long result = readVulong(in);
-    return (result >>> 1) ^ -(result & 1);
-  }
-
-  public float readFloat(InputStream in) throws IOException {
-    readFully(in, readBuffer, 0, 4);
-    int val = (((readBuffer[0] & 0xff) << 0)
-        + ((readBuffer[1] & 0xff) << 8)
-        + ((readBuffer[2] & 0xff) << 16)
-        + ((readBuffer[3] & 0xff) << 24));
-    return Float.intBitsToFloat(val);
-  }
-
-  public void writeFloat(OutputStream output,
-                         float value) throws IOException {
-    int ser = Float.floatToIntBits(value);
-    writeBuffer[0] = (byte) ((ser >> 0)  & 0xff);
-    writeBuffer[1] = (byte) ((ser >> 8)  & 0xff);
-    writeBuffer[2] = (byte) ((ser >> 16) & 0xff);
-    writeBuffer[3] = (byte) ((ser >> 24) & 0xff);
-    output.write(writeBuffer, 0, 4);
-  }
-
-  public double readDouble(InputStream in) throws IOException {
-    return Double.longBitsToDouble(readLongLE(in));
-  }
-
-  public long readLongLE(InputStream in) throws IOException {
-    readFully(in, readBuffer, 0, 8);
-    return (((readBuffer[0] & 0xff) << 0)
-        + ((readBuffer[1] & 0xff) << 8)
-        + ((readBuffer[2] & 0xff) << 16)
-        + ((long) (readBuffer[3] & 0xff) << 24)
-        + ((long) (readBuffer[4] & 0xff) << 32)
-        + ((long) (readBuffer[5] & 0xff) << 40)
-        + ((long) (readBuffer[6] & 0xff) << 48)
-        + ((long) (readBuffer[7] & 0xff) << 56));
-  }
-
-  private void readFully(final InputStream in, final byte[] buffer, final int off, final int len)
-      throws IOException {
-    int n = 0;
-    while (n < len) {
-      int count = in.read(buffer, off + n, len - n);
-      if (count < 0) {
-        throw new EOFException("Read past EOF for " + in);
-      }
-      n += count;
-    }
-  }
-
-  public void writeDouble(OutputStream output,
-                          double value) throws IOException {
-    writeLongLE(output, Double.doubleToLongBits(value));
-  }
-
-  private void writeLongLE(OutputStream output, long value) throws IOException {
-    writeBuffer[0] = (byte) ((value >> 0)  & 0xff);
-    writeBuffer[1] = (byte) ((value >> 8)  & 0xff);
-    writeBuffer[2] = (byte) ((value >> 16) & 0xff);
-    writeBuffer[3] = (byte) ((value >> 24) & 0xff);
-    writeBuffer[4] = (byte) ((value >> 32) & 0xff);
-    writeBuffer[5] = (byte) ((value >> 40) & 0xff);
-    writeBuffer[6] = (byte) ((value >> 48) & 0xff);
-    writeBuffer[7] = (byte) ((value >> 56) & 0xff);
-    output.write(writeBuffer, 0, 8);
-  }
-
-  /**
-   * Write the arbitrarily sized signed BigInteger in vint format.
-   *
-   * Signed integers are encoded using the low bit as the sign bit using zigzag
-   * encoding.
-   *
-   * Each byte uses the low 7 bits for data and the high bit for stop/continue.
-   *
-   * Bytes are stored LSB first.
-   * @param output the stream to write to
-   * @param value the value to output
-   * @throws IOException
-   */
-  public static void writeBigInteger(OutputStream output,
-                                     BigInteger value) throws IOException {
-    // encode the signed number as a positive integer
-    value = value.shiftLeft(1);
-    int sign = value.signum();
-    if (sign < 0) {
-      value = value.negate();
-      value = value.subtract(BigInteger.ONE);
-    }
-    int length = value.bitLength();
-    while (true) {
-      long lowBits = value.longValue() & 0x7fffffffffffffffL;
-      length -= 63;
-      // write out the next 63 bits worth of data
-      for(int i=0; i < 9; ++i) {
-        // if this is the last byte, leave the high bit off
-        if (length <= 0 && (lowBits & ~0x7f) == 0) {
-          output.write((byte) lowBits);
-          return;
-        } else {
-          output.write((byte) (0x80 | (lowBits & 0x7f)));
-          lowBits >>>= 7;
-        }
-      }
-      value = value.shiftRight(63);
-    }
-  }
-
-  /**
-   * Read the signed arbitrary sized BigInteger BigInteger in vint format
-   * @param input the stream to read from
-   * @return the read BigInteger
-   * @throws IOException
-   */
-  public static BigInteger readBigInteger(InputStream input) throws IOException {
-    BigInteger result = BigInteger.ZERO;
-    long work = 0;
-    int offset = 0;
-    long b;
-    do {
-      b = input.read();
-      if (b == -1) {
-        throw new EOFException("Reading BigInteger past EOF from " + input);
-      }
-      work |= (0x7f & b) << (offset % 63);
-      offset += 7;
-      // if we've read 63 bits, roll them into the result
-      if (offset == 63) {
-        result = BigInteger.valueOf(work);
-        work = 0;
-      } else if (offset % 63 == 0) {
-        result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
-        work = 0;
-      }
-    } while (b >= 0x80);
-    if (work != 0) {
-      result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
-    }
-    // convert back to a signed number
-    boolean isNegative = result.testBit(0);
-    if (isNegative) {
-      result = result.add(BigInteger.ONE);
-      result = result.negate();
-    }
-    result = result.shiftRight(1);
-    return result;
-  }
-
-  public enum FixedBitSizes {
-    ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
-    THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
-    TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
-    TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
-  }
-
-  /**
-   * Count the number of bits required to encode the given value
-   * @param value
-   * @return bits required to store value
-   */
-  public int findClosestNumBits(long value) {
-    int count = 0;
-    while (value != 0) {
-      count++;
-      value = value >>> 1;
-    }
-    return getClosestFixedBits(count);
-  }
-
-  /**
-   * zigzag encode the given value
-   * @param val
-   * @return zigzag encoded value
-   */
-  public long zigzagEncode(long val) {
-    return (val << 1) ^ (val >> 63);
-  }
-
-  /**
-   * zigzag decode the given value
-   * @param val
-   * @return zizag decoded value
-   */
-  public long zigzagDecode(long val) {
-    return (val >>> 1) ^ -(val & 1);
-  }
-
-  /**
-   * Compute the bits required to represent pth percentile value
-   * @param data - array
-   * @param p - percentile value (>=0.0 to <=1.0)
-   * @return pth percentile bits
-   */
-  public int percentileBits(long[] data, int offset, int length,
-                            double p) {
-    if ((p > 1.0) || (p <= 0.0)) {
-      return -1;
-    }
-
-    // histogram that store the encoded bit requirement for each values.
-    // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
-    int[] hist = new int[32];
-
-    // compute the histogram
-    for(int i = offset; i < (offset + length); i++) {
-      int idx = encodeBitWidth(findClosestNumBits(data[i]));
-      hist[idx] += 1;
-    }
-
-    int perLen = (int) (length * (1.0 - p));
-
-    // return the bits required by pth percentile length
-    for(int i = hist.length - 1; i >= 0; i--) {
-      perLen -= hist[i];
-      if (perLen < 0) {
-        return decodeBitWidth(i);
-      }
-    }
-
-    return 0;
-  }
-
-  /**
-   * Read n bytes in big endian order and convert to long
-   * @return long value
-   */
-  public long bytesToLongBE(InStream input, int n) throws IOException {
-    long out = 0;
-    long val = 0;
-    while (n > 0) {
-      n--;
-      // store it in a long and then shift else integer overflow will occur
-      val = input.read();
-      out |= (val << (n * 8));
-    }
-    return out;
-  }
-
-  /**
-   * Calculate the number of bytes required
-   * @param n - number of values
-   * @param numBits - bit width
-   * @return number of bytes required
-   */
-  int getTotalBytesRequired(int n, int numBits) {
-    return (n * numBits + 7) / 8;
-  }
-
-  /**
-   * For a given fixed bit this function will return the closest available fixed
-   * bit
-   * @param n
-   * @return closest valid fixed bit
-   */
-  public int getClosestFixedBits(int n) {
-    if (n == 0) {
-      return 1;
-    }
-
-    if (n >= 1 && n <= 24) {
-      return n;
-    } else if (n > 24 && n <= 26) {
-      return 26;
-    } else if (n > 26 && n <= 28) {
-      return 28;
-    } else if (n > 28 && n <= 30) {
-      return 30;
-    } else if (n > 30 && n <= 32) {
-      return 32;
-    } else if (n > 32 && n <= 40) {
-      return 40;
-    } else if (n > 40 && n <= 48) {
-      return 48;
-    } else if (n > 48 && n <= 56) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  public int getClosestAlignedFixedBits(int n) {
-    if (n == 0 ||  n == 1) {
-      return 1;
-    } else if (n > 1 && n <= 2) {
-      return 2;
-    } else if (n > 2 && n <= 4) {
-      return 4;
-    } else if (n > 4 && n <= 8) {
-      return 8;
-    } else if (n > 8 && n <= 16) {
-      return 16;
-    } else if (n > 16 && n <= 24) {
-      return 24;
-    } else if (n > 24 && n <= 32) {
-      return 32;
-    } else if (n > 32 && n <= 40) {
-      return 40;
-    } else if (n > 40 && n <= 48) {
-      return 48;
-    } else if (n > 48 && n <= 56) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  /**
-   * Finds the closest available fixed bit width match and returns its encoded
-   * value (ordinal)
-   * @param n - fixed bit width to encode
-   * @return encoded fixed bit width
-   */
-  public int encodeBitWidth(int n) {
-    n = getClosestFixedBits(n);
-
-    if (n >= 1 && n <= 24) {
-      return n - 1;
-    } else if (n > 24 && n <= 26) {
-      return FixedBitSizes.TWENTYSIX.ordinal();
-    } else if (n > 26 && n <= 28) {
-      return FixedBitSizes.TWENTYEIGHT.ordinal();
-    } else if (n > 28 && n <= 30) {
-      return FixedBitSizes.THIRTY.ordinal();
-    } else if (n > 30 && n <= 32) {
-      return FixedBitSizes.THIRTYTWO.ordinal();
-    } else if (n > 32 && n <= 40) {
-      return FixedBitSizes.FORTY.ordinal();
-    } else if (n > 40 && n <= 48) {
-      return FixedBitSizes.FORTYEIGHT.ordinal();
-    } else if (n > 48 && n <= 56) {
-      return FixedBitSizes.FIFTYSIX.ordinal();
-    } else {
-      return FixedBitSizes.SIXTYFOUR.ordinal();
-    }
-  }
-
-  /**
-   * Decodes the ordinal fixed bit value to actual fixed bit width value
-   * @param n - encoded fixed bit width
-   * @return decoded fixed bit width
-   */
-  public int decodeBitWidth(int n) {
-    if (n >= FixedBitSizes.ONE.ordinal()
-        && n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
-      return n + 1;
-    } else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
-      return 26;
-    } else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
-      return 28;
-    } else if (n == FixedBitSizes.THIRTY.ordinal()) {
-      return 30;
-    } else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
-      return 32;
-    } else if (n == FixedBitSizes.FORTY.ordinal()) {
-      return 40;
-    } else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
-      return 48;
-    } else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  /**
-   * Bitpack and write the input values to underlying output stream
-   * @param input - values to write
-   * @param offset - offset
-   * @param len - length
-   * @param bitSize - bit width
-   * @param output - output stream
-   * @throws IOException
-   */
-  public void writeInts(long[] input, int offset, int len, int bitSize,
-                        OutputStream output) throws IOException {
-    if (input == null || input.length < 1 || offset < 0 || len < 1
-        || bitSize < 1) {
-      return;
-    }
-
-    switch (bitSize) {
-    case 1:
-      unrolledBitPack1(input, offset, len, output);
-      return;
-    case 2:
-      unrolledBitPack2(input, offset, len, output);
-      return;
-    case 4:
-      unrolledBitPack4(input, offset, len, output);
-      return;
-    case 8:
-      unrolledBitPack8(input, offset, len, output);
-      return;
-    case 16:
-      unrolledBitPack16(input, offset, len, output);
-      return;
-    case 24:
-      unrolledBitPack24(input, offset, len, output);
-      return;
-    case 32:
-      unrolledBitPack32(input, offset, len, output);
-      return;
-    case 40:
-      unrolledBitPack40(input, offset, len, output);
-      return;
-    case 48:
-      unrolledBitPack48(input, offset, len, output);
-      return;
-    case 56:
-      unrolledBitPack56(input, offset, len, output);
-      return;
-    case 64:
-      unrolledBitPack64(input, offset, len, output);
-      return;
-    default:
-      break;
-    }
-
-    int bitsLeft = 8;
-    byte current = 0;
-    for(int i = offset; i < (offset + len); i++) {
-      long value = input[i];
-      int bitsToWrite = bitSize;
-      while (bitsToWrite > bitsLeft) {
-        // add the bits to the bottom of the current word
-        current |= value >>> (bitsToWrite - bitsLeft);
-        // subtract out the bits we just added
-        bitsToWrite -= bitsLeft;
-        // zero out the bits above bitsToWrite
-        value &= (1L << bitsToWrite) - 1;
-        output.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-      bitsLeft -= bitsToWrite;
-      current |= value << bitsLeft;
-      if (bitsLeft == 0) {
-        output.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-    }
-
-    // flush
-    if (bitsLeft != 8) {
-      output.write(current);
-      current = 0;
-      bitsLeft = 8;
-    }
-  }
-
-  private void unrolledBitPack1(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 1) << 7)
-          | ((input[i + 1] & 1) << 6)
-          | ((input[i + 2] & 1) << 5)
-          | ((input[i + 3] & 1) << 4)
-          | ((input[i + 4] & 1) << 3)
-          | ((input[i + 5] & 1) << 2)
-          | ((input[i + 6] & 1) << 1)
-          | (input[i + 7]) & 1);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 7;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 1) << startShift);
-        startShift -= 1;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack2(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 4;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 3) << 6)
-          | ((input[i + 1] & 3) << 4)
-          | ((input[i + 2] & 3) << 2)
-          | (input[i + 3]) & 3);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 6;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 3) << startShift);
-        startShift -= 2;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack4(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 2;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 15) << 4) | (input[i + 1]) & 15);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 4;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 15) << startShift);
-        startShift -= 4;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack8(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 1);
-  }
-
-  private void unrolledBitPack16(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 2);
-  }
-
-  private void unrolledBitPack24(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 3);
-  }
-
-  private void unrolledBitPack32(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 4);
-  }
-
-  private void unrolledBitPack40(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 5);
-  }
-
-  private void unrolledBitPack48(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 6);
-  }
-
-  private void unrolledBitPack56(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 7);
-  }
-
-  private void unrolledBitPack64(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 8);
-  }
-
-  private void unrolledBitPackBytes(long[] input, int offset, int len, OutputStream output, int numBytes) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int i = offset;
-    for (; i < endUnroll; i = i + numHops) {
-      writeLongBE(output, input, i, numHops, numBytes);
-    }
-
-    if (remainder > 0) {
-      writeRemainingLongs(output, i, input, remainder, numBytes);
-    }
-  }
-
-  private void writeRemainingLongs(OutputStream output, int offset, long[] input, int remainder,
-      int numBytes) throws IOException {
-    final int numHops = remainder;
-
-    int idx = 0;
-    switch (numBytes) {
-    case 1:
-      while (remainder > 0) {
-        writeBuffer[idx] = (byte) (input[offset + idx] & 255);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 2:
-      while (remainder > 0) {
-        writeLongBE2(output, input[offset + idx], idx * 2);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 3:
-      while (remainder > 0) {
-        writeLongBE3(output, input[offset + idx], idx * 3);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 4:
-      while (remainder > 0) {
-        writeLongBE4(output, input[offset + idx], idx * 4);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 5:
-      while (remainder > 0) {
-        writeLongBE5(output, input[offset + idx], idx * 5);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 6:
-      while (remainder > 0) {
-        writeLongBE6(output, input[offset + idx], idx * 6);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 7:
-      while (remainder > 0) {
-        writeLongBE7(output, input[offset + idx], idx * 7);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 8:
-      while (remainder > 0) {
-        writeLongBE8(output, input[offset + idx], idx * 8);
-        remainder--;
-        idx++;
-      }
-      break;
-    default:
-      break;
-    }
-
-    final int toWrite = numHops * numBytes;
-    output.write(writeBuffer, 0, toWrite);
-  }
-
-  private void writeLongBE(OutputStream output, long[] input, int offset, int numHops, int numBytes) throws IOException {
-
-    switch (numBytes) {
-    case 1:
-      writeBuffer[0] = (byte) (input[offset + 0] & 255);
-      writeBuffer[1] = (byte) (input[offset + 1] & 255);
-      writeBuffer[2] = (byte) (input[offset + 2] & 255);
-      writeBuffer[3] = (byte) (input[offset + 3] & 255);
-      writeBuffer[4] = (byte) (input[offset + 4] & 255);
-      writeBuffer[5] = (byte) (input[offset + 5] & 255);
-      writeBuffer[6] = (byte) (input[offset + 6] & 255);
-      writeBuffer[7] = (byte) (input[offset + 7] & 255);
-      break;
-    case 2:
-      writeLongBE2(output, input[offset + 0], 0);
-      writeLongBE2(output, input[offset + 1], 2);
-      writeLongBE2(output, input[offset + 2], 4);
-      writeLongBE2(output, input[offset + 3], 6);
-      writeLongBE2(output, input[offset + 4], 8);
-      writeLongBE2(output, input[offset + 5], 10);
-      writeLongBE2(output, input[offset + 6], 12);
-      writeLongBE2(output, input[offset + 7], 14);
-      break;
-    case 3:
-      writeLongBE3(output, input[offset + 0], 0);
-      writeLongBE3(output, input[offset + 1], 3);
-      writeLongBE3(output, input[offset + 2], 6);
-      writeLongBE3(output, input[offset + 3], 9);
-      writeLongBE3(output, input[offset + 4], 12);
-      writeLongBE3(output, input[offset + 5], 15);
-      writeLongBE3(output, input[offset + 6], 18);
-      writeLongBE3(output, input[offset + 7], 21);
-      break;
-    case 4:
-      writeLongBE4(output, input[offset + 0], 0);
-      writeLongBE4(output, input[offset + 1], 4);
-      writeLongBE4(output, input[offset + 2], 8);
-      writeLongBE4(output, input[offset + 3], 12);
-      writeLongBE4(output, input[offset + 4], 16);
-      writeLongBE4(output, input[offset + 5], 20);
-      writeLongBE4(output, input[offset + 6], 24);
-      writeLongBE4(output, input[offset + 7], 28);
-      break;
-    case 5:
-      writeLongBE5(output, input[offset + 0], 0);
-      writeLongBE5(output, input[offset + 1], 5);
-      writeLongBE5(output, input[offset + 2], 10);
-      writeLongBE5(output, input[offset + 3], 15);
-      writeLongBE5(output, input[offset + 4], 20);
-      writeLongBE5(output, input[offset + 5], 25);
-      writeLongBE5(output, input[offset + 6], 30);
-      writeLongBE5(output, input[offset + 7], 35);
-      break;
-    case 6:
-      writeLongBE6(output, input[offset + 0], 0);
-      writeLongBE6(output, input[offset + 1], 6);
-      writeLongBE6(output, input[offset + 2], 12);
-      writeLongBE6(output, input[offset + 3], 18);
-      writeLongBE6(output, input[offset + 4], 24);
-      writeLongBE6(output, input[offset + 5], 30);
-      writeLongBE6(output, input[offset + 6], 36);
-      writeLongBE6(output, input[offset + 7], 42);
-      break;
-    case 7:
-      writeLongBE7(output, input[offset + 0], 0);
-      writeLongBE7(output, input[offset + 1], 7);
-      writeLongBE7(output, input[offset + 2], 14);
-      writeLongBE7(output, input[offset + 3], 21);
-      writeLongBE7(output, input[offset + 4], 28);
-      writeLongBE7(output, input[offset + 5], 35);
-      writeLongBE7(output, input[offset + 6], 42);
-      writeLongBE7(output, input[offset + 7], 49);
-      break;
-    case 8:
-      writeLongBE8(output, input[offset + 0], 0);
-      writeLongBE8(output, input[offset + 1], 8);
-      writeLongBE8(output, input[offset + 2], 16);
-      writeLongBE8(output, input[offset + 3], 24);
-      writeLongBE8(output, input[offset + 4], 32);
-      writeLongBE8(output, input[offset + 5], 40);
-      writeLongBE8(output, input[offset + 6], 48);
-      writeLongBE8(output, input[offset + 7], 56);
-      break;
-      default:
-        break;
-    }
-
-    final int toWrite = numHops * numBytes;
-    output.write(writeBuffer, 0, toWrite);
-  }
-
-  private void writeLongBE2(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE3(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE4(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE5(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE6(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE7(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 48);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 6] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE8(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 56);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 48);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 6] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 7] =  (byte) (val >>> 0);
-  }
-
-  /**
-   * Read bitpacked integers from input stream
-   * @param buffer - input buffer
-   * @param offset - offset
-   * @param len - length
-   * @param bitSize - bit width
-   * @param input - input stream
-   * @throws IOException
-   */
-  public void readInts(long[] buffer, int offset, int len, int bitSize,
-                       InStream input) throws IOException {
-    int bitsLeft = 0;
-    int current = 0;
-
-    switch (bitSize) {
-    case 1:
-      unrolledUnPack1(buffer, offset, len, input);
-      return;
-    case 2:
-      unrolledUnPack2(buffer, offset, len, input);
-      return;
-    case 4:
-      unrolledUnPack4(buffer, offset, len, input);
-      return;
-    case 8:
-      unrolledUnPack8(buffer, offset, len, input);
-      return;
-    case 16:
-      unrolledUnPack16(buffer, offset, len, input);
-      return;
-    case 24:
-      unrolledUnPack24(buffer, offset, len, input);
-      return;
-    case 32:
-      unrolledUnPack32(buffer, offset, len, input);
-      return;
-    case 40:
-      unrolledUnPack40(buffer, offset, len, input);
-      return;
-    case 48:
-      unrolledUnPack48(buffer, offset, len, input);
-      return;
-    case 56:
-      unrolledUnPack56(buffer, offset, len, input);
-      return;
-    case 64:
-      unrolledUnPack64(buffer, offset, len, input);
-      return;
-    default:
-      break;
-    }
-
-    for(int i = offset; i < (offset + len); i++) {
-      long result = 0;
-      int bitsLeftToRead = bitSize;
-      while (bitsLeftToRead > bitsLeft) {
-        result <<= bitsLeft;
-        result |= current & ((1 << bitsLeft) - 1);
-        bitsLeftToRead -= bitsLeft;
-        current = input.read();
-        bitsLeft = 8;
-      }
-
-      // handle the left over bits
-      if (bitsLeftToRead > 0) {
-        result <<= bitsLeftToRead;
-        bitsLeft -= bitsLeftToRead;
-        result |= (current >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
-      }
-      buffer[i] = result;
-    }
-  }
-
-
-  private void unrolledUnPack1(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = input.read();
-      buffer[i] = (val >>> 7) & 1;
-      buffer[i + 1] = (val >>> 6) & 1;
-      buffer[i + 2] = (val >>> 5) & 1;
-      buffer[i + 3] = (val >>> 4) & 1;
-      buffer[i + 4] = (val >>> 3) & 1;
-      buffer[i + 5] = (val >>> 2) & 1;
-      buffer[i + 6] = (val >>> 1) & 1;
-      buffer[i + 7] = val & 1;
-    }
-
-    if (remainder > 0) {
-      int startShift = 7;
-      val = input.read();
-      for (int i = endUnroll; i < endOffset; i++) {
-        buffer[i] = (val >>> startShift) & 1;
-        startShift -= 1;
-      }
-    }
-  }
-
-  private void unrolledUnPack2(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    final int numHops = 4;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = input.read();
-      buffer[i] = (val >>> 6) & 3;
-      buffer[i + 1] = (val >>> 4) & 3;
-      buffer[i + 2] = (val >>> 2) & 3;
-      buffer[i + 3] = val & 3;
-    }
-
-    if (remainder > 0) {
-      int startShift = 6;
-      val = input.read();
-      for (int i = endUnroll; i < endOffset; i++) {
-        buffer[i] = (val >>> startShift) & 3;
-        startShift -= 2;
-      }
-    }
-  }
-
-  private void unrolledUnPack4(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    final int numHops = 2;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = input.read();
-      buffer[i] = (val >>> 4) & 15;
-      buffer[i + 1] = val & 15;
-    }
-
-    if (remainder > 0) {
-      int startShift = 4;
-      val = input.read();
-      for (int i = endUnroll; i < endOffset; i++) {
-        buffer[i] = (val >>> startShift) & 15;
-        startShift -= 4;
-      }
-    }
-  }
-
-  private void unrolledUnPack8(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 1);
-  }
-
-  private void unrolledUnPack16(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 2);
-  }
-
-  private void unrolledUnPack24(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 3);
-  }
-
-  private void unrolledUnPack32(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 4);
-  }
-
-  private void unrolledUnPack40(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 5);
-  }
-
-  private void unrolledUnPack48(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 6);
-  }
-
-  private void unrolledUnPack56(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 7);
-  }
-
-  private void unrolledUnPack64(long[] buffer, int offset, int len,
-      InStream input) throws IOException {
-    unrolledUnPackBytes(buffer, offset, len, input, 8);
-  }
-
-  private void unrolledUnPackBytes(long[] buffer, int offset, int len, InStream input, int numBytes)
-      throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int i = offset;
-    for (; i < endUnroll; i = i + numHops) {
-      readLongBE(input, buffer, i, numHops, numBytes);
-    }
-
-    if (remainder > 0) {
-      readRemainingLongs(buffer, i, input, remainder, numBytes);
-    }
-  }
-
-  private void readRemainingLongs(long[] buffer, int offset, InStream input, int remainder,
-      int numBytes) throws IOException {
-    final int toRead = remainder * numBytes;
-    // bulk read to buffer
-    int bytesRead = input.read(readBuffer, 0, toRead);
-    while (bytesRead != toRead) {
-      bytesRead += input.read(readBuffer, bytesRead, toRead - bytesRead);
-    }
-
-    int idx = 0;
-    switch (numBytes) {
-    case 1:
-      while (remainder > 0) {
-        buffer[offset++] = readBuffer[idx] & 255;
-        remainder--;
-        idx++;
-      }
-      break;
-    case 2:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE2(input, idx * 2);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 3:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE3(input, idx * 3);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 4:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE4(input, idx * 4);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 5:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE5(input, idx * 5);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 6:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE6(input, idx * 6);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 7:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE7(input, idx * 7);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 8:
-      while (remainder > 0) {
-        buffer[offset++] = readLongBE8(input, idx * 8);
-        remainder--;
-        idx++;
-      }
-      break;
-    default:
-      break;
-    }
-  }
-
-  private void readLongBE(InStream in, long[] buffer, int start, int numHops, int numBytes)
-      throws IOException {
-    final int toRead = numHops * numBytes;
-    // bulk read to buffer
-    int bytesRead = in.read(readBuffer, 0, toRead);
-    while (bytesRead != toRead) {
-      bytesRead += in.read(readBuffer, bytesRead, toRead - bytesRead);
-    }
-
-    switch (numBytes) {
-    case 1:
-      buffer[start + 0] = readBuffer[0] & 255;
-      buffer[start + 1] = readBuffer[1] & 255;
-      buffer[start + 2] = readBuffer[2] & 255;
-      buffer[start + 3] = readBuffer[3] & 255;
-      buffer[start + 4] = readBuffer[4] & 255;
-      buffer[start + 5] = readBuffer[5] & 255;
-      buffer[start + 6] = readBuffer[6] & 255;
-      buffer[start + 7] = readBuffer[7] & 255;
-      break;
-    case 2:
-      buffer[start + 0] = readLongBE2(in, 0);
-      buffer[start + 1] = readLongBE2(in, 2);
-      buffer[start + 2] = readLongBE2(in, 4);
-      buffer[start + 3] = readLongBE2(in, 6);
-      buffer[start + 4] = readLongBE2(in, 8);
-      buffer[start + 5] = readLongBE2(in, 10);
-      buffer[start + 6] = readLongBE2(in, 12);
-      buffer[start + 7] = readLongBE2(in, 14);
-      break;
-    case 3:
-      buffer[start + 0] = readLongBE3(in, 0);
-      buffer[start + 1] = readLongBE3(in, 3);
-      buffer[start + 2] = readLongBE3(in, 6);
-      buffer[start + 3] = readLongBE3(in, 9);
-      buffer[start + 4] = readLongBE3(in, 12);
-      buffer[start + 5] = readLongBE3(in, 15);
-      buffer[start + 6] = readLongBE3(in, 18);
-      buffer[start + 7] = readLongBE3(in, 21);
-      break;
-    case 4:
-      buffer[start + 0] = readLongBE4(in, 0);
-      buffer[start + 1] = readLongBE4(in, 4);
-      buffer[start + 2] = readLongBE4(in, 8);
-      buffer[start + 3] = readLongBE4(in, 12);
-      buffer[start + 4] = readLongBE4(in, 16);
-      buffer[start + 5] = readLongBE4(in, 20);
-      buffer[start + 6] = readLongBE4(in, 24);
-      buffer[start + 7] = readLongBE4(in, 28);
-      break;
-    case 5:
-      buffer[start + 0] = readLongBE5(in, 0);
-      buffer[start + 1] = readLongBE5(in, 5);
-      buffer[start + 2] = readLongBE5(in, 10);
-      buffer[start + 3] = readLongBE5(in, 15);
-      buffer[start + 4] = readLongBE5(in, 20);
-      buffer[start + 5] = readLongBE5(in, 25);
-      buffer[start + 6] = readLongBE5(in, 30);
-      buffer[start + 7] = readLongBE5(in, 35);
-      break;
-    case 6:
-      buffer[start + 0] = readLongBE6(in, 0);
-      buffer[start + 1] = readLongBE6(in, 6);
-      buffer[start + 2] = readLongBE6(in, 12);
-      buffer[start + 3] = readLongBE6(in, 18);
-      buffer[start + 4] = readLongBE6(in, 24);
-      buffer[start + 5] = readLongBE6(in, 30);
-      buffer[start + 6] = readLongBE6(in, 36);
-      buffer[start + 7] = readLongBE6(in, 42);
-      break;
-    case 7:
-      buffer[start + 0] = readLongBE7(in, 0);
-      buffer[start + 1] = readLongBE7(in, 7);
-      buffer[start + 2] = readLongBE7(in, 14);
-      buffer[start + 3] = readLongBE7(in, 21);
-      buffer[start + 4] = readLongBE7(in, 28);
-      buffer[start + 5] = readLongBE7(in, 35);
-      buffer[start + 6] = readLongBE7(in, 42);
-      buffer[start + 7] = readLongBE7(in, 49);
-      break;
-    case 8:
-      buffer[start + 0] = readLongBE8(in, 0);
-      buffer[start + 1] = readLongBE8(in, 8);
-      buffer[start + 2] = readLongBE8(in, 16);
-      buffer[start + 3] = readLongBE8(in, 24);
-      buffer[start + 4] = readLongBE8(in, 32);
-      buffer[start + 5] = readLongBE8(in, 40);
-      buffer[start + 6] = readLongBE8(in, 48);
-      buffer[start + 7] = readLongBE8(in, 56);
-      break;
-    default:
-      break;
-    }
-  }
-
-  private long readLongBE2(InStream in, int rbOffset) {
-    return (((readBuffer[rbOffset] & 255) << 8)
-        + ((readBuffer[rbOffset + 1] & 255) << 0));
-  }
-
-  private long readLongBE3(InStream in, int rbOffset) {
-    return (((readBuffer[rbOffset] & 255) << 16)
-        + ((readBuffer[rbOffset + 1] & 255) << 8)
-        + ((readBuffer[rbOffset + 2] & 255) << 0));
-  }
-
-  private long readLongBE4(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 24)
-        + ((readBuffer[rbOffset + 1] & 255) << 16)
-        + ((readBuffer[rbOffset + 2] & 255) << 8)
-        + ((readBuffer[rbOffset + 3] & 255) << 0));
-  }
-
-  private long readLongBE5(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 24)
-        + ((readBuffer[rbOffset + 2] & 255) << 16)
-        + ((readBuffer[rbOffset + 3] & 255) << 8)
-        + ((readBuffer[rbOffset + 4] & 255) << 0));
-  }
-
-  private long readLongBE6(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 40)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 2] & 255) << 24)
-        + ((readBuffer[rbOffset + 3] & 255) << 16)
-        + ((readBuffer[rbOffset + 4] & 255) << 8)
-        + ((readBuffer[rbOffset + 5] & 255) << 0));
-  }
-
-  private long readLongBE7(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 48)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 40)
-        + ((long) (readBuffer[rbOffset + 2] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 3] & 255) << 24)
-        + ((readBuffer[rbOffset + 4] & 255) << 16)
-        + ((readBuffer[rbOffset + 5] & 255) << 8)
-        + ((readBuffer[rbOffset + 6] & 255) << 0));
-  }
-
-  private long readLongBE8(InStream in, int rbOffset) {
-    return (((long) (readBuffer[rbOffset] & 255) << 56)
-        + ((long) (readBuffer[rbOffset + 1] & 255) << 48)
-        + ((long) (readBuffer[rbOffset + 2] & 255) << 40)
-        + ((long) (readBuffer[rbOffset + 3] & 255) << 32)
-        + ((long) (readBuffer[rbOffset + 4] & 255) << 24)
-        + ((readBuffer[rbOffset + 5] & 255) << 16)
-        + ((readBuffer[rbOffset + 6] & 255) << 8)
-        + ((readBuffer[rbOffset + 7] & 255) << 0));
-  }
-
-  // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
-  // ArithmeticException in case of overflow
-  public boolean isSafeSubtract(long left, long right) {
-    return (left ^ right) >= 0 | (left ^ (left - right)) >= 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/SettableUncompressedStream.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/SettableUncompressedStream.java b/orc/src/java/org/apache/orc/impl/SettableUncompressedStream.java
deleted file mode 100644
index f9e29eb..0000000
--- a/orc/src/java/org/apache/orc/impl/SettableUncompressedStream.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.common.DiskRangeInfo;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.orc.impl.InStream;
-
-/**
- * An uncompressed stream whose underlying byte buffer can be set.
- */
-public class SettableUncompressedStream extends InStream.UncompressedStream {
-
-  public SettableUncompressedStream(String name, List<DiskRange> input, long length) {
-    super(name, input, length);
-    setOffset(input);
-  }
-
-  public void setBuffers(DiskRangeInfo diskRangeInfo) {
-    reset(diskRangeInfo.getDiskRanges(), diskRangeInfo.getTotalLength());
-    setOffset(diskRangeInfo.getDiskRanges());
-  }
-
-  private void setOffset(List<DiskRange> list) {
-    currentOffset = list.isEmpty() ? 0 : list.get(0).getOffset();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/SnappyCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/SnappyCodec.java b/orc/src/java/org/apache/orc/impl/SnappyCodec.java
deleted file mode 100644
index dd4f30c..0000000
--- a/orc/src/java/org/apache/orc/impl/SnappyCodec.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.CompressionCodec;
-import org.iq80.snappy.Snappy;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-public class SnappyCodec implements CompressionCodec, DirectDecompressionCodec {
-  private static final HadoopShims SHIMS = HadoopShims.Factory.get();
-
-  Boolean direct = null;
-
-  @Override
-  public boolean compress(ByteBuffer in, ByteBuffer out,
-                          ByteBuffer overflow) throws IOException {
-    int inBytes = in.remaining();
-    // I should work on a patch for Snappy to support an overflow buffer
-    // to prevent the extra buffer copy.
-    byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
-    int outBytes =
-        Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
-            compressed, 0);
-    if (outBytes < inBytes) {
-      int remaining = out.remaining();
-      if (remaining >= outBytes) {
-        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
-            out.position(), outBytes);
-        out.position(out.position() + outBytes);
-      } else {
-        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
-            out.position(), remaining);
-        out.position(out.limit());
-        System.arraycopy(compressed, remaining, overflow.array(),
-            overflow.arrayOffset(), outBytes - remaining);
-        overflow.position(outBytes - remaining);
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
-    if(in.isDirect() && out.isDirect()) {
-      directDecompress(in, out);
-      return;
-    }
-    int inOffset = in.position();
-    int uncompressLen =
-        Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
-        in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
-    out.position(uncompressLen + out.position());
-    out.flip();
-  }
-
-  @Override
-  public boolean isAvailable() {
-    if (direct == null) {
-      try {
-        if (SHIMS.getDirectDecompressor(
-            HadoopShims.DirectCompressionType.SNAPPY) != null) {
-          direct = Boolean.valueOf(true);
-        } else {
-          direct = Boolean.valueOf(false);
-        }
-      } catch (UnsatisfiedLinkError ule) {
-        direct = Boolean.valueOf(false);
-      }
-    }
-    return direct.booleanValue();
-  }
-
-  @Override
-  public void directDecompress(ByteBuffer in, ByteBuffer out)
-      throws IOException {
-    HadoopShims.DirectDecompressor decompressShim =
-        SHIMS.getDirectDecompressor(HadoopShims.DirectCompressionType.SNAPPY);
-    decompressShim.decompress(in, out);
-    out.flip(); // flip for read
-  }
-
-  @Override
-  public CompressionCodec modify(EnumSet<Modifier> modifiers) {
-    // snappy allows no modifications
-    return this;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/StreamName.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/StreamName.java b/orc/src/java/org/apache/orc/impl/StreamName.java
deleted file mode 100644
index b3fd145..0000000
--- a/orc/src/java/org/apache/orc/impl/StreamName.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc.impl;
-
-import org.apache.orc.OrcProto;
-
-/**
- * The name of a stream within a stripe.
- */
-public class StreamName implements Comparable<StreamName> {
-  private final int column;
-  private final OrcProto.Stream.Kind kind;
-
-  public static enum Area {
-    DATA, INDEX
-  }
-
-  public StreamName(int column, OrcProto.Stream.Kind kind) {
-    this.column = column;
-    this.kind = kind;
-  }
-
-  public boolean equals(Object obj) {
-    if (obj != null && obj instanceof  StreamName) {
-      StreamName other = (StreamName) obj;
-      return other.column == column && other.kind == kind;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public int compareTo(StreamName streamName) {
-    if (streamName == null) {
-      return -1;
-    }
-    Area area = getArea(kind);
-    Area otherArea = streamName.getArea(streamName.kind);
-    if (area != otherArea) {
-      return -area.compareTo(otherArea);
-    }
-    if (column != streamName.column) {
-      return column < streamName.column ? -1 : 1;
-    }
-    return kind.compareTo(streamName.kind);
-  }
-
-  public int getColumn() {
-    return column;
-  }
-
-  public OrcProto.Stream.Kind getKind() {
-    return kind;
-  }
-
-  public Area getArea() {
-    return getArea(kind);
-  }
-
-  public static Area getArea(OrcProto.Stream.Kind kind) {
-    switch (kind) {
-      case ROW_INDEX:
-      case DICTIONARY_COUNT:
-      case BLOOM_FILTER:
-        return Area.INDEX;
-      default:
-        return Area.DATA;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "Stream for column " + column + " kind " + kind;
-  }
-
-  @Override
-  public int hashCode() {
-    return column * 101 + kind.getNumber();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/orc/impl/StringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/StringRedBlackTree.java b/orc/src/java/org/apache/orc/impl/StringRedBlackTree.java
deleted file mode 100644
index c353ab0..0000000
--- a/orc/src/java/org/apache/orc/impl/StringRedBlackTree.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc.impl;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.io.Text;
-import org.apache.orc.impl.DynamicByteArray;
-import org.apache.orc.impl.DynamicIntArray;
-import org.apache.orc.impl.RedBlackTree;
-
-/**
- * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
- * and an offset for each entry.
- */
-public class StringRedBlackTree extends RedBlackTree {
-  private final DynamicByteArray byteArray = new DynamicByteArray();
-  private final DynamicIntArray keyOffsets;
-  private final Text newKey = new Text();
-
-  public StringRedBlackTree(int initialCapacity) {
-    super(initialCapacity);
-    keyOffsets = new DynamicIntArray(initialCapacity);
-  }
-
-  public int add(String value) {
-    newKey.set(value);
-    return addNewKey();
-  }
-
-  private int addNewKey() {
-    // if the newKey is actually new, add it to our byteArray and store the offset & length
-    if (add()) {
-      int len = newKey.getLength();
-      keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
-    }
-    return lastAdd;
-  }
-
-  public int add(Text value) {
-    newKey.set(value);
-    return addNewKey();
-  }
-
-  public int add(byte[] bytes, int offset, int length) {
-    newKey.set(bytes, offset, length);
-    return addNewKey();
-  }
-
-  @Override
-  protected int compareValue(int position) {
-    int start = keyOffsets.get(position);
-    int end;
-    if (position + 1 == keyOffsets.size()) {
-      end = byteArray.size();
-    } else {
-      end = keyOffsets.get(position+1);
-    }
-    return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
-                             start, end - start);
-  }
-
-  /**
-   * The information about each node.
-   */
-  public interface VisitorContext {
-    /**
-     * Get the position where the key was originally added.
-     * @return the number returned by add.
-     */
-    int getOriginalPosition();
-
-    /**
-     * Write the bytes for the string to the given output stream.
-     * @param out the stream to write to.
-     * @throws IOException
-     */
-    void writeBytes(OutputStream out) throws IOException;
-
-    /**
-     * Get the original string.
-     * @return the string
-     */
-    Text getText();
-
-    /**
-     * Get the number of bytes.
-     * @return the string's length in bytes
-     */
-    int getLength();
-  }
-
-  /**
-   * The interface for visitors.
-   */
-  public interface Visitor {
-    /**
-     * Called once for each node of the tree in sort order.
-     * @param context the information about each node
-     * @throws IOException
-     */
-    void visit(VisitorContext context) throws IOException;
-  }
-
-  private class VisitorContextImpl implements VisitorContext {
-    private int originalPosition;
-    private int start;
-    private int end;
-    private final Text text = new Text();
-
-    public int getOriginalPosition() {
-      return originalPosition;
-    }
-
-    public Text getText() {
-      byteArray.setText(text, start, end - start);
-      return text;
-    }
-
-    public void writeBytes(OutputStream out) throws IOException {
-      byteArray.write(out, start, end - start);
-    }
-
-    public int getLength() {
-      return end - start;
-    }
-
-    void setPosition(int position) {
-      originalPosition = position;
-      start = keyOffsets.get(originalPosition);
-      if (position + 1 == keyOffsets.size()) {
-        end = byteArray.size();
-      } else {
-        end = keyOffsets.get(originalPosition + 1);
-      }
-    }
-  }
-
-  private void recurse(int node, Visitor visitor, VisitorContextImpl context
-                      ) throws IOException {
-    if (node != NULL) {
-      recurse(getLeft(node), visitor, context);
-      context.setPosition(node);
-      visitor.visit(context);
-      recurse(getRight(node), visitor, context);
-    }
-  }
-
-  /**
-   * Visit all of the nodes in the tree in sorted order.
-   * @param visitor the action to be applied to each node
-   * @throws IOException
-   */
-  public void visit(Visitor visitor) throws IOException {
-    recurse(root, visitor, new VisitorContextImpl());
-  }
-
-  /**
-   * Reset the table to empty.
-   */
-  public void clear() {
-    super.clear();
-    byteArray.clear();
-    keyOffsets.clear();
-  }
-
-  public void getText(Text result, int originalPosition) {
-    int offset = keyOffsets.get(originalPosition);
-    int length;
-    if (originalPosition + 1 == keyOffsets.size()) {
-      length = byteArray.size() - offset;
-    } else {
-      length = keyOffsets.get(originalPosition + 1) - offset;
-    }
-    byteArray.setText(result, offset, length);
-  }
-
-  /**
-   * Get the size of the character data in the table.
-   * @return the bytes used by the table
-   */
-  public int getCharacterSize() {
-    return byteArray.size();
-  }
-
-  /**
-   * Calculate the approximate size in memory.
-   * @return the number of bytes used in storing the tree.
-   */
-  public long getSizeInBytes() {
-    return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
-      super.getSizeInBytes();
-  }
-}


[06/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

Posted by om...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestColumnStatistics.java b/orc/src/test/org/apache/orc/TestColumnStatistics.java
deleted file mode 100644
index 93d4bdb..0000000
--- a/orc/src/test/org/apache/orc/TestColumnStatistics.java
+++ /dev/null
@@ -1,365 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assume.assumeTrue;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.sql.Timestamp;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.orc.impl.ColumnStatisticsImpl;
-import org.apache.orc.tools.FileDump;
-import org.apache.orc.tools.TestFileDump;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-/**
- * Test ColumnStatisticsImpl for ORC.
- */
-public class TestColumnStatistics {
-
-  @Test
-  public void testLongMerge() throws Exception {
-    TypeDescription schema = TypeDescription.createInt();
-
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
-    stats1.updateInteger(10, 2);
-    stats2.updateInteger(1, 1);
-    stats2.updateInteger(1000, 1);
-    stats1.merge(stats2);
-    IntegerColumnStatistics typed = (IntegerColumnStatistics) stats1;
-    assertEquals(1, typed.getMinimum());
-    assertEquals(1000, typed.getMaximum());
-    stats1.reset();
-    stats1.updateInteger(-10, 1);
-    stats1.updateInteger(10000, 1);
-    stats1.merge(stats2);
-    assertEquals(-10, typed.getMinimum());
-    assertEquals(10000, typed.getMaximum());
-  }
-
-  @Test
-  public void testDoubleMerge() throws Exception {
-    TypeDescription schema = TypeDescription.createDouble();
-
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
-    stats1.updateDouble(10.0);
-    stats1.updateDouble(100.0);
-    stats2.updateDouble(1.0);
-    stats2.updateDouble(1000.0);
-    stats1.merge(stats2);
-    DoubleColumnStatistics typed = (DoubleColumnStatistics) stats1;
-    assertEquals(1.0, typed.getMinimum(), 0.001);
-    assertEquals(1000.0, typed.getMaximum(), 0.001);
-    stats1.reset();
-    stats1.updateDouble(-10);
-    stats1.updateDouble(10000);
-    stats1.merge(stats2);
-    assertEquals(-10, typed.getMinimum(), 0.001);
-    assertEquals(10000, typed.getMaximum(), 0.001);
-  }
-
-
-  @Test
-  public void testStringMerge() throws Exception {
-    TypeDescription schema = TypeDescription.createString();
-
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
-    stats1.updateString(new Text("bob"));
-    stats1.updateString(new Text("david"));
-    stats1.updateString(new Text("charles"));
-    stats2.updateString(new Text("anne"));
-    byte[] erin = new byte[]{0, 1, 2, 3, 4, 5, 101, 114, 105, 110};
-    stats2.updateString(erin, 6, 4, 5);
-    assertEquals(24, ((StringColumnStatistics)stats2).getSum());
-    stats1.merge(stats2);
-    StringColumnStatistics typed = (StringColumnStatistics) stats1;
-    assertEquals("anne", typed.getMinimum());
-    assertEquals("erin", typed.getMaximum());
-    assertEquals(39, typed.getSum());
-    stats1.reset();
-    stats1.updateString(new Text("aaa"));
-    stats1.updateString(new Text("zzz"));
-    stats1.merge(stats2);
-    assertEquals("aaa", typed.getMinimum());
-    assertEquals("zzz", typed.getMaximum());
-  }
-
-  @Test
-  public void testDateMerge() throws Exception {
-    TypeDescription schema = TypeDescription.createDate();
-
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
-    stats1.updateDate(new DateWritable(1000));
-    stats1.updateDate(new DateWritable(100));
-    stats2.updateDate(new DateWritable(10));
-    stats2.updateDate(new DateWritable(2000));
-    stats1.merge(stats2);
-    DateColumnStatistics typed = (DateColumnStatistics) stats1;
-    assertEquals(new DateWritable(10).get(), typed.getMinimum());
-    assertEquals(new DateWritable(2000).get(), typed.getMaximum());
-    stats1.reset();
-    stats1.updateDate(new DateWritable(-10));
-    stats1.updateDate(new DateWritable(10000));
-    stats1.merge(stats2);
-    assertEquals(new DateWritable(-10).get(), typed.getMinimum());
-    assertEquals(new DateWritable(10000).get(), typed.getMaximum());
-  }
-
-  @Test
-  public void testTimestampMerge() throws Exception {
-    TypeDescription schema = TypeDescription.createTimestamp();
-
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
-    stats1.updateTimestamp(new Timestamp(10));
-    stats1.updateTimestamp(new Timestamp(100));
-    stats2.updateTimestamp(new Timestamp(1));
-    stats2.updateTimestamp(new Timestamp(1000));
-    stats1.merge(stats2);
-    TimestampColumnStatistics typed = (TimestampColumnStatistics) stats1;
-    assertEquals(1, typed.getMinimum().getTime());
-    assertEquals(1000, typed.getMaximum().getTime());
-    stats1.reset();
-    stats1.updateTimestamp(new Timestamp(-10));
-    stats1.updateTimestamp(new Timestamp(10000));
-    stats1.merge(stats2);
-    assertEquals(-10, typed.getMinimum().getTime());
-    assertEquals(10000, typed.getMaximum().getTime());
-  }
-
-  @Test
-  public void testDecimalMerge() throws Exception {
-    TypeDescription schema = TypeDescription.createDecimal()
-        .withPrecision(38).withScale(16);
-
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
-    stats1.updateDecimal(new HiveDecimalWritable(10));
-    stats1.updateDecimal(new HiveDecimalWritable(100));
-    stats2.updateDecimal(new HiveDecimalWritable(1));
-    stats2.updateDecimal(new HiveDecimalWritable(1000));
-    stats1.merge(stats2);
-    DecimalColumnStatistics typed = (DecimalColumnStatistics) stats1;
-    assertEquals(1, typed.getMinimum().longValue());
-    assertEquals(1000, typed.getMaximum().longValue());
-    stats1.reset();
-    stats1.updateDecimal(new HiveDecimalWritable(-10));
-    stats1.updateDecimal(new HiveDecimalWritable(10000));
-    stats1.merge(stats2);
-    assertEquals(-10, typed.getMinimum().longValue());
-    assertEquals(10000, typed.getMaximum().longValue());
-  }
-
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  private static BytesWritable bytes(int... items) {
-    BytesWritable result = new BytesWritable();
-    result.setSize(items.length);
-    for (int i = 0; i < items.length; ++i) {
-      result.getBytes()[i] = (byte) items[i];
-    }
-    return result;
-  }
-
-  void appendRow(VectorizedRowBatch batch, BytesWritable bytes,
-                 String str) {
-    int row = batch.size++;
-    if (bytes == null) {
-      batch.cols[0].noNulls = false;
-      batch.cols[0].isNull[row] = true;
-    } else {
-      ((BytesColumnVector) batch.cols[0]).setVal(row, bytes.getBytes(),
-          0, bytes.getLength());
-    }
-    if (str == null) {
-      batch.cols[1].noNulls = false;
-      batch.cols[1].isNull[row] = true;
-    } else {
-      ((BytesColumnVector) batch.cols[1]).setVal(row, str.getBytes());
-    }
-  }
-
-  @Test
-  public void testHasNull() throws Exception {
-    TypeDescription schema =
-        TypeDescription.createStruct()
-        .addField("bytes1", TypeDescription.createBinary())
-        .addField("string1", TypeDescription.createString());
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .rowIndexStride(1000)
-            .stripeSize(10000)
-            .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch(5000);
-    // STRIPE 1
-    // RG1
-    for(int i=0; i<1000; i++) {
-      appendRow(batch, bytes(1, 2, 3), "RG1");
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // RG2
-    for(int i=0; i<1000; i++) {
-      appendRow(batch, bytes(1, 2, 3), null);
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // RG3
-    for(int i=0; i<1000; i++) {
-      appendRow(batch, bytes(1, 2, 3), "RG3");
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // RG4
-    for (int i = 0; i < 1000; i++) {
-      appendRow(batch, bytes(1,2,3), null);
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // RG5
-    for(int i=0; i<1000; i++) {
-      appendRow(batch, bytes(1, 2, 3), null);
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // STRIPE 2
-    for (int i = 0; i < 5000; i++) {
-      appendRow(batch, bytes(1,2,3), null);
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // STRIPE 3
-    for (int i = 0; i < 5000; i++) {
-      appendRow(batch, bytes(1,2,3), "STRIPE-3");
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    // STRIPE 4
-    for (int i = 0; i < 5000; i++) {
-      appendRow(batch, bytes(1,2,3), null);
-    }
-    writer.addRowBatch(batch);
-    batch.reset();
-    writer.close();
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-
-    // check the file level stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(20000, stats[0].getNumberOfValues());
-    assertEquals(20000, stats[1].getNumberOfValues());
-    assertEquals(7000, stats[2].getNumberOfValues());
-    assertEquals(false, stats[0].hasNull());
-    assertEquals(false, stats[1].hasNull());
-    assertEquals(true, stats[2].hasNull());
-
-    // check the stripe level stats
-    List<StripeStatistics> stripeStats = reader.getStripeStatistics();
-    // stripe 1 stats
-    StripeStatistics ss1 = stripeStats.get(0);
-    ColumnStatistics ss1_cs1 = ss1.getColumnStatistics()[0];
-    ColumnStatistics ss1_cs2 = ss1.getColumnStatistics()[1];
-    ColumnStatistics ss1_cs3 = ss1.getColumnStatistics()[2];
-    assertEquals(false, ss1_cs1.hasNull());
-    assertEquals(false, ss1_cs2.hasNull());
-    assertEquals(true, ss1_cs3.hasNull());
-
-    // stripe 2 stats
-    StripeStatistics ss2 = stripeStats.get(1);
-    ColumnStatistics ss2_cs1 = ss2.getColumnStatistics()[0];
-    ColumnStatistics ss2_cs2 = ss2.getColumnStatistics()[1];
-    ColumnStatistics ss2_cs3 = ss2.getColumnStatistics()[2];
-    assertEquals(false, ss2_cs1.hasNull());
-    assertEquals(false, ss2_cs2.hasNull());
-    assertEquals(true, ss2_cs3.hasNull());
-
-    // stripe 3 stats
-    StripeStatistics ss3 = stripeStats.get(2);
-    ColumnStatistics ss3_cs1 = ss3.getColumnStatistics()[0];
-    ColumnStatistics ss3_cs2 = ss3.getColumnStatistics()[1];
-    ColumnStatistics ss3_cs3 = ss3.getColumnStatistics()[2];
-    assertEquals(false, ss3_cs1.hasNull());
-    assertEquals(false, ss3_cs2.hasNull());
-    assertEquals(false, ss3_cs3.hasNull());
-
-    // stripe 4 stats
-    StripeStatistics ss4 = stripeStats.get(3);
-    ColumnStatistics ss4_cs1 = ss4.getColumnStatistics()[0];
-    ColumnStatistics ss4_cs2 = ss4.getColumnStatistics()[1];
-    ColumnStatistics ss4_cs3 = ss4.getColumnStatistics()[2];
-    assertEquals(false, ss4_cs1.hasNull());
-    assertEquals(false, ss4_cs2.hasNull());
-    assertEquals(true, ss4_cs3.hasNull());
-
-    // Test file dump
-    PrintStream origOut = System.out;
-    String outputFilename = "orc-file-has-null.out";
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
-
-    // replace stdout and run command
-    System.setOut(new PrintStream(myOut));
-    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
-    System.out.flush();
-    System.setOut(origOut);
-    // If called with an expression evaluating to false, the test will halt
-    // and be ignored.
-    assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
-    TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java b/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
deleted file mode 100644
index 526dd81..0000000
--- a/orc/src/test/org/apache/orc/TestNewIntegerEncoding.java
+++ /dev/null
@@ -1,1373 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-import com.google.common.collect.Lists;
-import com.google.common.primitives.Longs;
-
-@RunWith(value = Parameterized.class)
-public class TestNewIntegerEncoding {
-
-  private OrcFile.EncodingStrategy encodingStrategy;
-
-  public TestNewIntegerEncoding( OrcFile.EncodingStrategy es) {
-    this.encodingStrategy = es;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { {  OrcFile.EncodingStrategy.COMPRESSION },
-        {  OrcFile.EncodingStrategy.SPEED } };
-    return Arrays.asList(data);
-  }
-
-  public static class TSRow {
-    Timestamp ts;
-
-    public TSRow(Timestamp ts) {
-      this.ts = ts;
-    }
-  }
-
-  public static TypeDescription getRowSchema() {
-    return TypeDescription.createStruct()
-        .addField("int1", TypeDescription.createInt())
-        .addField("long1", TypeDescription.createLong());
-  }
-
-  public static void appendRow(VectorizedRowBatch batch,
-                               int int1, long long1) {
-    int row = batch.size++;
-    ((LongColumnVector) batch.cols[0]).vector[row] = int1;
-    ((LongColumnVector) batch.cols[1]).vector[row] = long1;
-  }
-
-  public static void appendLong(VectorizedRowBatch batch,
-                                long long1) {
-    int row = batch.size++;
-    ((LongColumnVector) batch.cols[0]).vector[row] = long1;
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target"
-      + File.separator + "test" + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile."
-        + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testBasicRow() throws Exception {
-    TypeDescription schema= getRowSchema();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(100000)
-                                         .compress(CompressionKind.NONE)
-                                         .bufferSize(10000)
-                                         .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    appendRow(batch, 111, 1111L);
-    appendRow(batch, 111, 1111L);
-    appendRow(batch, 111, 1111L);
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(111, ((LongColumnVector) batch.cols[0]).vector[r]);
-        assertEquals(1111, ((LongColumnVector) batch.cols[1]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testBasicOld() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-    long[] inp = new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6,
-        7, 8, 9, 10, 1, 1, 1, 1, 1, 1, 10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1,
-        2, 5, 1, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1,
-        9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
-        1, 1, 1, 1 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .compress(CompressionKind.NONE)
-                                         .version(OrcFile.Version.V_0_11)
-                                         .bufferSize(10000)
-                                         .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    batch = reader.getSchema().createRowBatch();
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testBasicNew() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6,
-        7, 8, 9, 10, 1, 1, 1, 1, 1, 1, 10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1,
-        2, 5, 1, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1, 9, 2, 6, 3, 7, 1,
-        9, 2, 6, 3, 7, 1, 9, 2, 6, 2000, 2, 1, 1, 1, 1, 1, 3, 7, 1, 9, 2, 6, 1,
-        1, 1, 1, 1 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    batch = reader.getSchema().createRowBatch();
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-  
-  @Test
-  public void testBasicDelta1() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { -500, -400, -350, -325, -310 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testBasicDelta2() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { -500, -600, -650, -675, -710 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testBasicDelta3() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 500, 400, 350, 325, 310 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testBasicDelta4() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 500, 600, 650, 675, 710 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testDeltaOverflow() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[]{4513343538618202719l, 4513343538618202711l,
-        2911390882471569739l,
-        -9181829309989854913l};
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile
-        .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testDeltaOverflow2() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[]{Long.MAX_VALUE, 4513343538618202711l,
-        2911390882471569739l,
-        Long.MIN_VALUE};
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile
-        .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testDeltaOverflow3() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[]{-4513343538618202711l, -2911390882471569739l, -2,
-        Long.MAX_VALUE};
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile
-        .createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testIntegerMin() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    input.add((long) Integer.MIN_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testIntegerMax() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    input.add((long) Integer.MAX_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testLongMin() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    input.add(Long.MIN_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testLongMax() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    input.add(Long.MAX_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testRandomInt() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 100000; i++) {
-      input.add((long) rand.nextInt());
-    }
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(100000);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testRandomLong() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 100000; i++) {
-      input.add(rand.nextLong());
-    }
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(100000);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseNegativeMin() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
-        3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
-        1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
-        52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
-        2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -13, 1, 2, 3,
-        13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
-        141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
-        13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
-        1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
-        2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
-        1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
-        2, 16 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseNegativeMin2() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
-        3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
-        1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
-        52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
-        2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -1, 1, 2, 3,
-        13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
-        141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
-        13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
-        1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
-        2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
-        1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
-        2, 16 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseNegativeMin3() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2,
-        3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32, 1, 2, 3, 1, 8, 30, 1, 3, 414, 1,
-        1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11, 1, 2, 2, 1, 1,
-        52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
-        2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, 0, 1, 2, 3,
-        13, 1, 92, 3, 13, 5, 14, 9, 141, 12, 6, 15, 25, 1, 1, 1, 46, 2, 1, 1,
-        141, 3, 1, 1, 1, 1, 2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4,
-        13, 1, 5, 4, 4, 19, 5, 1, 1, 1, 68, 33, 399, 1, 1885, 25, 5, 2, 4, 1,
-        1, 2, 16, 1, 2966, 3, 1, 1, 25501, 1, 1, 1, 66, 1, 3, 8, 131, 14, 5, 1,
-        2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2, 2, 1, 5, 10, 3, 1,
-        1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3, 2,
-        2, 16 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseNegativeMin4() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    long[] inp = new long[] { 13, 13, 11, 8, 13, 10, 10, 11, 11, 14, 11, 7, 13,
-        12, 12, 11, 15, 12, 12, 9, 8, 10, 13, 11, 8, 6, 5, 6, 11, 7, 15, 10, 7,
-        6, 8, 7, 9, 9, 11, 33, 11, 3, 7, 4, 6, 10, 14, 12, 5, 14, 7, 6 };
-    List<Long> input = Lists.newArrayList(Longs.asList(inp));
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseAt0() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(100));
-    }
-    input.set(0, 20000L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseAt1() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(100));
-    }
-    input.set(1, 20000L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .compress(CompressionKind.NONE)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseAt255() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(100));
-    }
-    input.set(255, 20000L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseAt256() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(100));
-    }
-    input.set(256, 20000L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBase510() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(100));
-    }
-    input.set(510, 20000L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBase511() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(100));
-    }
-    input.set(511, 20000L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseMax1() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for (int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(60));
-    }
-    input.set(511, Long.MAX_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseMax2() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for (int i = 0; i < 5120; i++) {
-      input.add((long) rand.nextInt(60));
-    }
-    input.set(128, Long.MAX_VALUE);
-    input.set(256, Long.MAX_VALUE);
-    input.set(511, Long.MAX_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(5120);
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseMax3() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    input.add(371946367L);
-    input.add(11963367L);
-    input.add(68639400007L);
-    input.add(100233367L);
-    input.add(6367L);
-    input.add(10026367L);
-    input.add(3670000L);
-    input.add(3602367L);
-    input.add(4719226367L);
-    input.add(7196367L);
-    input.add(444442L);
-    input.add(210267L);
-    input.add(21033L);
-    input.add(160267L);
-    input.add(400267L);
-    input.add(23634347L);
-    input.add(16027L);
-    input.add(46026367L);
-    input.add(Long.MAX_VALUE);
-    input.add(33333L);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseMax4() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    for (int i = 0; i < 25; i++) {
-      input.add(371292224226367L);
-      input.add(119622332222267L);
-      input.add(686329400222007L);
-      input.add(100233333222367L);
-      input.add(636272333322222L);
-      input.add(10202633223267L);
-      input.add(36700222022230L);
-      input.add(36023226224227L);
-      input.add(47192226364427L);
-      input.add(71963622222447L);
-      input.add(22244444222222L);
-      input.add(21220263327442L);
-      input.add(21032233332232L);
-      input.add(16026322232227L);
-      input.add(40022262272212L);
-      input.add(23634342227222L);
-      input.add(16022222222227L);
-      input.add(46026362222227L);
-      input.add(46026362222227L);
-      input.add(33322222222323L);
-    }
-    input.add(Long.MAX_VALUE);
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-
-  @Test
-  public void testPatchedBaseTimestamp() throws Exception {
-    TypeDescription schema = TypeDescription.createStruct()
-        .addField("ts", TypeDescription.createTimestamp());
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-
-    List<Timestamp> tslist = Lists.newArrayList();
-    tslist.add(Timestamp.valueOf("2099-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("1999-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("1995-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2010-03-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("1996-08-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("1998-11-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("1993-08-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("2008-01-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("2007-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2008-10-02 00:00:00"));
-    tslist.add(Timestamp.valueOf("2003-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2008-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("1994-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2004-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2001-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2000-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2000-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2006-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2011-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2002-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("2005-01-01 00:00:00"));
-    tslist.add(Timestamp.valueOf("1974-01-01 00:00:00"));
-    int idx = 0;
-    for (Timestamp ts : tslist) {
-      ((TimestampColumnVector) batch.cols[0]).set(idx, ts);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(tslist.get(idx++),
-            ((TimestampColumnVector) batch.cols[0]).asScratchTimestamp(r));
-      }
-    }
-  }
-
-  @Test
-  public void testDirectLargeNegatives() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch();
-
-    appendLong(batch, -7486502418706614742L);
-    appendLong(batch, 0L);
-    appendLong(batch, 1L);
-    appendLong(batch, 1L);
-    appendLong(batch, -5535739865598783616L);
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(5, batch.size);
-    assertEquals(-7486502418706614742L,
-        ((LongColumnVector) batch.cols[0]).vector[0]);
-    assertEquals(0L,
-        ((LongColumnVector) batch.cols[0]).vector[1]);
-    assertEquals(1L,
-        ((LongColumnVector) batch.cols[0]).vector[2]);
-    assertEquals(1L,
-        ((LongColumnVector) batch.cols[0]).vector[3]);
-    assertEquals(-5535739865598783616L,
-        ((LongColumnVector) batch.cols[0]).vector[4]);
-    assertEquals(false, rows.nextBatch(batch));
-  }
-
-  @Test
-  public void testSeek() throws Exception {
-    TypeDescription schema = TypeDescription.createLong();
-
-    List<Long> input = Lists.newArrayList();
-    Random rand = new Random();
-    for(int i = 0; i < 100000; i++) {
-      input.add((long) rand.nextInt());
-    }
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf)
-            .setSchema(schema)
-            .compress(CompressionKind.NONE)
-            .stripeSize(100000)
-            .bufferSize(10000)
-            .version(OrcFile.Version.V_0_11)
-            .encodingStrategy(encodingStrategy));
-    VectorizedRowBatch batch = schema.createRowBatch(100000);
-    for(Long l : input) {
-      appendLong(batch, l);
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    int idx = 55555;
-    rows.seekToRow(idx);
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(input.get(idx++).longValue(),
-            ((LongColumnVector) batch.cols[0]).vector[r]);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestOrcNullOptimization.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestOrcNullOptimization.java b/orc/src/test/org/apache/orc/TestOrcNullOptimization.java
deleted file mode 100644
index 0b605c9..0000000
--- a/orc/src/test/org/apache/orc/TestOrcNullOptimization.java
+++ /dev/null
@@ -1,415 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.Random;
-
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-import org.apache.orc.impl.RecordReaderImpl;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-
-import com.google.common.collect.Lists;
-
-public class TestOrcNullOptimization {
-
-  TypeDescription createMyStruct() {
-    return TypeDescription.createStruct()
-        .addField("a", TypeDescription.createInt())
-        .addField("b", TypeDescription.createString())
-        .addField("c", TypeDescription.createBoolean())
-        .addField("d", TypeDescription.createList(
-            TypeDescription.createStruct()
-                .addField("z", TypeDescription.createInt())));
-  }
-
-  void addRow(Writer writer, VectorizedRowBatch batch,
-              Integer a, String b, Boolean c,
-              Integer... d) throws IOException {
-    if (batch.size == batch.getMaxSize()) {
-      writer.addRowBatch(batch);
-      batch.reset();
-    }
-    int row = batch.size++;
-    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
-    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
-    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
-    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
-    StructColumnVector dStruct = (StructColumnVector) dColumn.child;
-    LongColumnVector dInt = (LongColumnVector) dStruct.fields[0];
-    if (a == null) {
-      aColumn.noNulls = false;
-      aColumn.isNull[row] = true;
-    } else {
-      aColumn.vector[row] = a;
-    }
-    if (b == null) {
-      bColumn.noNulls = false;
-      bColumn.isNull[row] = true;
-    } else {
-      bColumn.setVal(row, b.getBytes());
-    }
-    if (c == null) {
-      cColumn.noNulls = false;
-      cColumn.isNull[row] = true;
-    } else {
-      cColumn.vector[row] = c ? 1 : 0;
-    }
-    if (d == null) {
-      dColumn.noNulls = false;
-      dColumn.isNull[row] = true;
-    } else {
-      dColumn.offsets[row] = dColumn.childCount;
-      dColumn.lengths[row] = d.length;
-      dColumn.childCount += d.length;
-      for(int e=0; e < d.length; ++e) {
-        dInt.vector[(int) dColumn.offsets[row] + e] = d[e];
-      }
-    }
-  }
-
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcNullOptimization." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @Test
-  public void testMultiStripeWithNull() throws Exception {
-    TypeDescription schema = createMyStruct();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(100000)
-                                         .compress(CompressionKind.NONE)
-                                         .bufferSize(10000));
-    Random rand = new Random(100);
-    VectorizedRowBatch batch = schema.createRowBatch();
-    addRow(writer, batch, null, null, true, 100);
-    for (int i = 2; i < 20000; i++) {
-      addRow(writer, batch, rand.nextInt(1), "a", true, 100);
-    }
-    addRow(writer, batch, null, null, true, 100);
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    // check the stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(20000, reader.getNumberOfRows());
-    assertEquals(20000, stats[0].getNumberOfValues());
-
-    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMaximum());
-    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
-    assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
-    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0",
-        stats[1].toString());
-
-    assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
-    assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
-    assertEquals(19998, stats[2].getNumberOfValues());
-    assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998",
-        stats[2].toString());
-
-    // check the inspectors
-    assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
-        reader.getSchema().toString());
-
-    RecordReader rows = reader.rows();
-
-    List<Boolean> expected = Lists.newArrayList();
-    for (StripeInformation sinfo : reader.getStripes()) {
-      expected.add(false);
-    }
-    // only the first and last stripe will have PRESENT stream
-    expected.set(0, true);
-    expected.set(expected.size() - 1, true);
-
-    List<Boolean> got = Lists.newArrayList();
-    // check if the strip footer contains PRESENT stream
-    for (StripeInformation sinfo : reader.getStripes()) {
-      OrcProto.StripeFooter sf =
-        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
-      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
-              != -1);
-    }
-    assertEquals(expected, got);
-
-    batch = reader.getSchema().createRowBatch();
-    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
-    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
-    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
-    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
-    LongColumnVector dElements =
-        (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
-    assertEquals(true , rows.nextBatch(batch));
-    assertEquals(1024, batch.size);
-
-    // row 1
-    assertEquals(true, aColumn.isNull[0]);
-    assertEquals(true, bColumn.isNull[0]);
-    assertEquals(1, cColumn.vector[0]);
-    assertEquals(0, dColumn.offsets[0]);
-    assertEquals(1, dColumn.lengths[1]);
-    assertEquals(100, dElements.vector[0]);
-
-    rows.seekToRow(19998);
-    rows.nextBatch(batch);
-    assertEquals(2, batch.size);
-
-    // last-1 row
-    assertEquals(0, aColumn.vector[0]);
-    assertEquals("a", bColumn.toString(0));
-    assertEquals(1, cColumn.vector[0]);
-    assertEquals(0, dColumn.offsets[0]);
-    assertEquals(1, dColumn.lengths[0]);
-    assertEquals(100, dElements.vector[0]);
-
-    // last row
-    assertEquals(true, aColumn.isNull[1]);
-    assertEquals(true, bColumn.isNull[1]);
-    assertEquals(1, cColumn.vector[1]);
-    assertEquals(1, dColumn.offsets[1]);
-    assertEquals(1, dColumn.lengths[1]);
-    assertEquals(100, dElements.vector[1]);
-
-    assertEquals(false, rows.nextBatch(batch));
-    rows.close();
-  }
-
-  @Test
-  public void testMultiStripeWithoutNull() throws Exception {
-    TypeDescription schema = createMyStruct();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(100000)
-                                         .compress(CompressionKind.NONE)
-                                         .bufferSize(10000));
-    Random rand = new Random(100);
-    VectorizedRowBatch batch = schema.createRowBatch();
-    for (int i = 1; i < 20000; i++) {
-      addRow(writer, batch, rand.nextInt(1), "a", true, 100);
-    }
-    addRow(writer, batch, 0, "b", true, 100);
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    // check the stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(20000, reader.getNumberOfRows());
-    assertEquals(20000, stats[0].getNumberOfValues());
-
-    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMaximum());
-    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
-    assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
-    assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0",
-        stats[1].toString());
-
-    assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
-    assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
-    assertEquals(20000, stats[2].getNumberOfValues());
-    assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000",
-        stats[2].toString());
-
-    // check the inspectors
-    Assert.assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
-        reader.getSchema().toString());
-
-    RecordReader rows = reader.rows();
-
-    // none of the stripes will have PRESENT stream
-    List<Boolean> expected = Lists.newArrayList();
-    for (StripeInformation sinfo : reader.getStripes()) {
-      expected.add(false);
-    }
-
-    List<Boolean> got = Lists.newArrayList();
-    // check if the strip footer contains PRESENT stream
-    for (StripeInformation sinfo : reader.getStripes()) {
-      OrcProto.StripeFooter sf =
-        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
-      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
-              != -1);
-    }
-    assertEquals(expected, got);
-
-    rows.seekToRow(19998);
-
-    batch = reader.getSchema().createRowBatch();
-    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
-    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
-    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
-    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
-    LongColumnVector dElements =
-        (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
-
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(2, batch.size);
-
-    // last-1 row
-    assertEquals(0, aColumn.vector[0]);
-    assertEquals("a", bColumn.toString(0));
-    assertEquals(1, cColumn.vector[0]);
-    assertEquals(0, dColumn.offsets[0]);
-    assertEquals(1, dColumn.lengths[0]);
-    assertEquals(100, dElements.vector[0]);
-
-    // last row
-    assertEquals(0, aColumn.vector[1]);
-    assertEquals("b", bColumn.toString(1));
-    assertEquals(1, cColumn.vector[1]);
-    assertEquals(1, dColumn.offsets[1]);
-    assertEquals(1, dColumn.lengths[1]);
-    assertEquals(100, dElements.vector[1]);
-    rows.close();
-  }
-
-  @Test
-  public void testColumnsWithNullAndCompression() throws Exception {
-    TypeDescription schema = createMyStruct();
-    Writer writer = OrcFile.createWriter(testFilePath,
-                                         OrcFile.writerOptions(conf)
-                                         .setSchema(schema)
-                                         .stripeSize(100000)
-                                         .bufferSize(10000));
-    VectorizedRowBatch batch = schema.createRowBatch();
-    addRow(writer, batch, 3, "a", true, 100);
-    addRow(writer, batch, null, "b", true, 100);
-    addRow(writer, batch, 3, null, false, 100);
-    addRow(writer, batch, 3, "d", true, 100);
-    addRow(writer, batch, 2, "e", true, 100);
-    addRow(writer, batch, 2, "f", true, 100);
-    addRow(writer, batch, 2, "g", true, 100);
-    addRow(writer, batch, 2, "h", true, 100);
-    writer.addRowBatch(batch);
-    writer.close();
-
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    // check the stats
-    ColumnStatistics[] stats = reader.getStatistics();
-    assertEquals(8, reader.getNumberOfRows());
-    assertEquals(8, stats[0].getNumberOfValues());
-
-    assertEquals(3, ((IntegerColumnStatistics) stats[1]).getMaximum());
-    assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum());
-    assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
-    assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17",
-        stats[1].toString());
-
-    assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
-    assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
-    assertEquals(7, stats[2].getNumberOfValues());
-    assertEquals("count: 7 hasNull: true min: a max: h sum: 7",
-        stats[2].toString());
-
-    // check the inspectors
-    batch = reader.getSchema().createRowBatch();
-    LongColumnVector aColumn = (LongColumnVector) batch.cols[0];
-    BytesColumnVector bColumn = (BytesColumnVector) batch.cols[1];
-    LongColumnVector cColumn = (LongColumnVector) batch.cols[2];
-    ListColumnVector dColumn = (ListColumnVector) batch.cols[3];
-    LongColumnVector dElements =
-        (LongColumnVector)(((StructColumnVector) dColumn.child).fields[0]);
-    Assert.assertEquals("struct<a:int,b:string,c:boolean,d:array<struct<z:int>>>",
-        reader.getSchema().toString());
-
-    RecordReader rows = reader.rows();
-    // only the last strip will have PRESENT stream
-    List<Boolean> expected = Lists.newArrayList();
-    for (StripeInformation sinfo : reader.getStripes()) {
-      expected.add(false);
-    }
-    expected.set(expected.size() - 1, true);
-
-    List<Boolean> got = Lists.newArrayList();
-    // check if the strip footer contains PRESENT stream
-    for (StripeInformation sinfo : reader.getStripes()) {
-      OrcProto.StripeFooter sf =
-        ((RecordReaderImpl) rows).readStripeFooter(sinfo);
-      got.add(sf.toString().indexOf(OrcProto.Stream.Kind.PRESENT.toString())
-              != -1);
-    }
-    assertEquals(expected, got);
-
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(8, batch.size);
-
-    // row 1
-    assertEquals(3, aColumn.vector[0]);
-    assertEquals("a", bColumn.toString(0));
-    assertEquals(1, cColumn.vector[0]);
-    assertEquals(0, dColumn.offsets[0]);
-    assertEquals(1, dColumn.lengths[0]);
-    assertEquals(100, dElements.vector[0]);
-
-    // row 2
-    assertEquals(true, aColumn.isNull[1]);
-    assertEquals("b", bColumn.toString(1));
-    assertEquals(1, cColumn.vector[1]);
-    assertEquals(1, dColumn.offsets[1]);
-    assertEquals(1, dColumn.lengths[1]);
-    assertEquals(100, dElements.vector[1]);
-
-    // row 3
-    assertEquals(3, aColumn.vector[2]);
-    assertEquals(true, bColumn.isNull[2]);
-    assertEquals(0, cColumn.vector[2]);
-    assertEquals(2, dColumn.offsets[2]);
-    assertEquals(1, dColumn.lengths[2]);
-    assertEquals(100, dElements.vector[2]);
-
-    rows.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/orc/TestOrcTimezone1.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/orc/TestOrcTimezone1.java b/orc/src/test/org/apache/orc/TestOrcTimezone1.java
deleted file mode 100644
index 72dc455..0000000
--- a/orc/src/test/org/apache/orc/TestOrcTimezone1.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.orc;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertNotNull;
-
-import java.io.File;
-import java.sql.Timestamp;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.TimeZone;
-
-import junit.framework.Assert;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import com.google.common.collect.Lists;
-
-/**
- *
- */
-@RunWith(Parameterized.class)
-public class TestOrcTimezone1 {
-  Path workDir = new Path(System.getProperty("test.tmp.dir",
-      "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-  String writerTimeZone;
-  String readerTimeZone;
-  static TimeZone defaultTimeZone = TimeZone.getDefault();
-
-  public TestOrcTimezone1(String writerTZ, String readerTZ) {
-    this.writerTimeZone = writerTZ;
-    this.readerTimeZone = readerTZ;
-  }
-
-  @Parameterized.Parameters
-  public static Collection<Object[]> data() {
-    List<Object[]> result = Arrays.asList(new Object[][]{
-        /* Extreme timezones */
-        {"GMT-12:00", "GMT+14:00"},
-        /* No difference in DST */
-        {"America/Los_Angeles", "America/Los_Angeles"}, /* same timezone both with DST */
-        {"Europe/Berlin", "Europe/Berlin"}, /* same as above but europe */
-        {"America/Phoenix", "Asia/Kolkata"} /* Writer no DST, Reader no DST */,
-        {"Europe/Berlin", "America/Los_Angeles"} /* Writer DST, Reader DST */,
-        {"Europe/Berlin", "America/Chicago"} /* Writer DST, Reader DST */,
-        /* With DST difference */
-        {"Europe/Berlin", "UTC"},
-        {"UTC", "Europe/Berlin"} /* Writer no DST, Reader DST */,
-        {"America/Los_Angeles", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
-        {"Europe/Berlin", "Asia/Kolkata"} /* Writer DST, Reader no DST */,
-        /* Timezone offsets for the reader has changed historically */
-        {"Asia/Saigon", "Pacific/Enderbury"},
-        {"UTC", "Asia/Jerusalem"},
-
-        // NOTE:
-        // "1995-01-01 03:00:00.688888888" this is not a valid time in Pacific/Enderbury timezone.
-        // On 1995-01-01 00:00:00 GMT offset moved from -11:00 hr to +13:00 which makes all values
-        // on 1995-01-01 invalid. Try this with joda time
-        // new MutableDateTime("1995-01-01", DateTimeZone.forTimeZone(readerTimeZone));
-    });
-    return result;
-  }
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." +
-        testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-  }
-
-  @After
-  public void restoreTimeZone() {
-    TimeZone.setDefault(defaultTimeZone);
-  }
-
-  @Test
-  public void testTimestampWriter() throws Exception {
-    TypeDescription schema = TypeDescription.createTimestamp();
-
-    TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
-    Writer writer = OrcFile.createWriter(testFilePath,
-        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
-            .bufferSize(10000));
-    assertEquals(writerTimeZone, TimeZone.getDefault().getID());
-    List<String> ts = Lists.newArrayList();
-    ts.add("2003-01-01 01:00:00.000000222");
-    ts.add("1996-08-02 09:00:00.723100809");
-    ts.add("1999-01-01 02:00:00.999999999");
-    ts.add("1995-01-02 03:00:00.688888888");
-    ts.add("2002-01-01 04:00:00.1");
-    ts.add("2010-03-02 05:00:00.000009001");
-    ts.add("2005-01-01 06:00:00.000002229");
-    ts.add("2006-01-01 07:00:00.900203003");
-    ts.add("2003-01-01 08:00:00.800000007");
-    ts.add("1998-11-02 10:00:00.857340643");
-    ts.add("2008-10-02 11:00:00.0");
-    ts.add("2037-01-01 00:00:00.000999");
-    ts.add("2014-03-28 00:00:00.0");
-    VectorizedRowBatch batch = schema.createRowBatch();
-    TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
-    for (String t : ts) {
-      times.set(batch.size++, Timestamp.valueOf(t));
-    }
-    writer.addRowBatch(batch);
-    writer.close();
-
-    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
-    Reader reader = OrcFile.createReader(testFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    assertEquals(readerTimeZone, TimeZone.getDefault().getID());
-    RecordReader rows = reader.rows();
-    batch = reader.getSchema().createRowBatch();
-    times = (TimestampColumnVector) batch.cols[0];
-    int idx = 0;
-    while (rows.nextBatch(batch)) {
-      for(int r=0; r < batch.size; ++r) {
-        assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
-      }
-    }
-    rows.close();
-  }
-
-  @Test
-  public void testReadTimestampFormat_0_11() throws Exception {
-    TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
-    Path oldFilePath = new Path(getClass().getClassLoader().
-        getSystemResource("orc-file-11-format.orc").getPath());
-    Reader reader = OrcFile.createReader(oldFilePath,
-        OrcFile.readerOptions(conf).filesystem(fs));
-    TypeDescription schema = reader.getSchema();
-    int col = schema.getFieldNames().indexOf("ts");
-    VectorizedRowBatch batch = schema.createRowBatch(10);
-    TimestampColumnVector ts = (TimestampColumnVector) batch.cols[col];
-
-    boolean[] include = new boolean[schema.getMaximumId() + 1];
-    include[schema.getChildren().get(col).getId()] = true;
-    RecordReader rows = reader.rows
-        (new Reader.Options().include(include));
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
-        ts.asScratchTimestamp(0));
-
-    // check the contents of second row
-    rows.seekToRow(7499);
-    assertEquals(true, rows.nextBatch(batch));
-    assertEquals(1, batch.size);
-    assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"),
-        ts.asScratchTimestamp(0));
-
-    // handle the close up
-    Assert.assertEquals(false, rows.nextBatch(batch));
-    rows.close();
-  }
-}