You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/12/12 00:28:10 UTC

[13/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley reviewed by prasanthj)

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/TypeDescription.java b/orc/src/java/org/apache/orc/TypeDescription.java
new file mode 100644
index 0000000..fc945e4
--- /dev/null
+++ b/orc/src/java/org/apache/orc/TypeDescription.java
@@ -0,0 +1,540 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This is the description of the types in an ORC file.
+ */
+public class TypeDescription {
+  private static final int MAX_PRECISION = 38;
+  private static final int MAX_SCALE = 38;
+  private static final int DEFAULT_PRECISION = 38;
+  private static final int DEFAULT_SCALE = 10;
+  private static final int DEFAULT_LENGTH = 256;
+  public enum Category {
+    BOOLEAN("boolean", true),
+    BYTE("tinyint", true),
+    SHORT("smallint", true),
+    INT("int", true),
+    LONG("bigint", true),
+    FLOAT("float", true),
+    DOUBLE("double", true),
+    STRING("string", true),
+    DATE("date", true),
+    TIMESTAMP("timestamp", true),
+    BINARY("binary", true),
+    DECIMAL("decimal", true),
+    VARCHAR("varchar", true),
+    CHAR("char", true),
+    LIST("array", false),
+    MAP("map", false),
+    STRUCT("struct", false),
+    UNION("union", false);
+
+    Category(String name, boolean isPrimitive) {
+      this.name = name;
+      this.isPrimitive = isPrimitive;
+    }
+
+    final boolean isPrimitive;
+    final String name;
+
+    public boolean isPrimitive() {
+      return isPrimitive;
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+
+  public static TypeDescription createBoolean() {
+    return new TypeDescription(Category.BOOLEAN);
+  }
+
+  public static TypeDescription createByte() {
+    return new TypeDescription(Category.BYTE);
+  }
+
+  public static TypeDescription createShort() {
+    return new TypeDescription(Category.SHORT);
+  }
+
+  public static TypeDescription createInt() {
+    return new TypeDescription(Category.INT);
+  }
+
+  public static TypeDescription createLong() {
+    return new TypeDescription(Category.LONG);
+  }
+
+  public static TypeDescription createFloat() {
+    return new TypeDescription(Category.FLOAT);
+  }
+
+  public static TypeDescription createDouble() {
+    return new TypeDescription(Category.DOUBLE);
+  }
+
+  public static TypeDescription createString() {
+    return new TypeDescription(Category.STRING);
+  }
+
+  public static TypeDescription createDate() {
+    return new TypeDescription(Category.DATE);
+  }
+
+  public static TypeDescription createTimestamp() {
+    return new TypeDescription(Category.TIMESTAMP);
+  }
+
+  public static TypeDescription createBinary() {
+    return new TypeDescription(Category.BINARY);
+  }
+
+  public static TypeDescription createDecimal() {
+    return new TypeDescription(Category.DECIMAL);
+  }
+
+  /**
+   * For decimal types, set the precision.
+   * @param precision the new precision
+   * @return this
+   */
+  public TypeDescription withPrecision(int precision) {
+    if (category != Category.DECIMAL) {
+      throw new IllegalArgumentException("precision is only allowed on decimal"+
+         " and not " + category.name);
+    } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
+      throw new IllegalArgumentException("precision " + precision +
+          " is out of range 1 .. " + scale);
+    }
+    this.precision = precision;
+    return this;
+  }
+
+  /**
+   * For decimal types, set the scale.
+   * @param scale the new scale
+   * @return this
+   */
+  public TypeDescription withScale(int scale) {
+    if (category != Category.DECIMAL) {
+      throw new IllegalArgumentException("scale is only allowed on decimal"+
+          " and not " + category.name);
+    } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
+      throw new IllegalArgumentException("scale is out of range at " + scale);
+    }
+    this.scale = scale;
+    return this;
+  }
+
+  public static TypeDescription createVarchar() {
+    return new TypeDescription(Category.VARCHAR);
+  }
+
+  public static TypeDescription createChar() {
+    return new TypeDescription(Category.CHAR);
+  }
+
+  /**
+   * Set the maximum length for char and varchar types.
+   * @param maxLength the maximum value
+   * @return this
+   */
+  public TypeDescription withMaxLength(int maxLength) {
+    if (category != Category.VARCHAR && category != Category.CHAR) {
+      throw new IllegalArgumentException("maxLength is only allowed on char" +
+                   " and varchar and not " + category.name);
+    }
+    this.maxLength = maxLength;
+    return this;
+  }
+
+  public static TypeDescription createList(TypeDescription childType) {
+    TypeDescription result = new TypeDescription(Category.LIST);
+    result.children.add(childType);
+    childType.parent = result;
+    return result;
+  }
+
+  public static TypeDescription createMap(TypeDescription keyType,
+                                          TypeDescription valueType) {
+    TypeDescription result = new TypeDescription(Category.MAP);
+    result.children.add(keyType);
+    result.children.add(valueType);
+    keyType.parent = result;
+    valueType.parent = result;
+    return result;
+  }
+
+  public static TypeDescription createUnion() {
+    return new TypeDescription(Category.UNION);
+  }
+
+  public static TypeDescription createStruct() {
+    return new TypeDescription(Category.STRUCT);
+  }
+
+  /**
+   * Add a child to a union type.
+   * @param child a new child type to add
+   * @return the union type.
+   */
+  public TypeDescription addUnionChild(TypeDescription child) {
+    if (category != Category.UNION) {
+      throw new IllegalArgumentException("Can only add types to union type" +
+          " and not " + category);
+    }
+    children.add(child);
+    child.parent = this;
+    return this;
+  }
+
+  /**
+   * Add a field to a struct type as it is built.
+   * @param field the field name
+   * @param fieldType the type of the field
+   * @return the struct type
+   */
+  public TypeDescription addField(String field, TypeDescription fieldType) {
+    if (category != Category.STRUCT) {
+      throw new IllegalArgumentException("Can only add fields to struct type" +
+          " and not " + category);
+    }
+    fieldNames.add(field);
+    children.add(fieldType);
+    fieldType.parent = this;
+    return this;
+  }
+
+  /**
+   * Get the id for this type.
+   * The first call will cause all of the the ids in tree to be assigned, so
+   * it should not be called before the type is completely built.
+   * @return the sequential id
+   */
+  public int getId() {
+    // if the id hasn't been assigned, assign all of the ids from the root
+    if (id == -1) {
+      TypeDescription root = this;
+      while (root.parent != null) {
+        root = root.parent;
+      }
+      root.assignIds(0);
+    }
+    return id;
+  }
+
+  /**
+   * Get the maximum id assigned to this type or its children.
+   * The first call will cause all of the the ids in tree to be assigned, so
+   * it should not be called before the type is completely built.
+   * @return the maximum id assigned under this type
+   */
+  public int getMaximumId() {
+    // if the id hasn't been assigned, assign all of the ids from the root
+    if (maxId == -1) {
+      TypeDescription root = this;
+      while (root.parent != null) {
+        root = root.parent;
+      }
+      root.assignIds(0);
+    }
+    return maxId;
+  }
+
+  private ColumnVector createColumn() {
+    switch (category) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case TIMESTAMP:
+      case DATE:
+        return new LongColumnVector();
+      case FLOAT:
+      case DOUBLE:
+        return new DoubleColumnVector();
+      case DECIMAL:
+        return new DecimalColumnVector(precision, scale);
+      case STRING:
+      case BINARY:
+      case CHAR:
+      case VARCHAR:
+        return new BytesColumnVector();
+      case STRUCT: {
+        ColumnVector[] fieldVector = new ColumnVector[children.size()];
+        for(int i=0; i < fieldVector.length; ++i) {
+          fieldVector[i] = children.get(i).createColumn();
+        }
+        return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+                fieldVector);
+      }
+      case UNION: {
+        ColumnVector[] fieldVector = new ColumnVector[children.size()];
+        for(int i=0; i < fieldVector.length; ++i) {
+          fieldVector[i] = children.get(i).createColumn();
+        }
+        return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+            fieldVector);
+      }
+      case LIST:
+        return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+            children.get(0).createColumn());
+      case MAP:
+        return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
+            children.get(0).createColumn(), children.get(1).createColumn());
+      default:
+        throw new IllegalArgumentException("Unknown type " + category);
+    }
+  }
+
+  public VectorizedRowBatch createRowBatch() {
+    VectorizedRowBatch result;
+    if (category == Category.STRUCT) {
+      result = new VectorizedRowBatch(children.size(),
+          VectorizedRowBatch.DEFAULT_SIZE);
+      for(int i=0; i < result.cols.length; ++i) {
+        result.cols[i] = children.get(i).createColumn();
+      }
+    } else {
+      result = new VectorizedRowBatch(1, VectorizedRowBatch.DEFAULT_SIZE);
+      result.cols[0] = createColumn();
+    }
+    result.reset();
+    return result;
+  }
+
+  /**
+   * Get the kind of this type.
+   * @return get the category for this type.
+   */
+  public Category getCategory() {
+    return category;
+  }
+
+  /**
+   * Get the maximum length of the type. Only used for char and varchar types.
+   * @return the maximum length of the string type
+   */
+  public int getMaxLength() {
+    return maxLength;
+  }
+
+  /**
+   * Get the precision of the decimal type.
+   * @return the number of digits for the precision.
+   */
+  public int getPrecision() {
+    return precision;
+  }
+
+  /**
+   * Get the scale of the decimal type.
+   * @return the number of digits for the scale.
+   */
+  public int getScale() {
+    return scale;
+  }
+
+  /**
+   * For struct types, get the list of field names.
+   * @return the list of field names.
+   */
+  public List<String> getFieldNames() {
+    return Collections.unmodifiableList(fieldNames);
+  }
+
+  /**
+   * Get the subtypes of this type.
+   * @return the list of children types
+   */
+  public List<TypeDescription> getChildren() {
+    return children == null ? null : Collections.unmodifiableList(children);
+  }
+
+  /**
+   * Assign ids to all of the nodes under this one.
+   * @param startId the lowest id to assign
+   * @return the next available id
+   */
+  private int assignIds(int startId) {
+    id = startId++;
+    if (children != null) {
+      for (TypeDescription child : children) {
+        startId = child.assignIds(startId);
+      }
+    }
+    maxId = startId - 1;
+    return startId;
+  }
+
+  private TypeDescription(Category category) {
+    this.category = category;
+    if (category.isPrimitive) {
+      children = null;
+    } else {
+      children = new ArrayList<>();
+    }
+    if (category == Category.STRUCT) {
+      fieldNames = new ArrayList<>();
+    } else {
+      fieldNames = null;
+    }
+  }
+
+  private int id = -1;
+  private int maxId = -1;
+  private TypeDescription parent;
+  private final Category category;
+  private final List<TypeDescription> children;
+  private final List<String> fieldNames;
+  private int maxLength = DEFAULT_LENGTH;
+  private int precision = DEFAULT_PRECISION;
+  private int scale = DEFAULT_SCALE;
+
+  public void printToBuffer(StringBuilder buffer) {
+    buffer.append(category.name);
+    switch (category) {
+      case DECIMAL:
+        buffer.append('(');
+        buffer.append(precision);
+        buffer.append(',');
+        buffer.append(scale);
+        buffer.append(')');
+        break;
+      case CHAR:
+      case VARCHAR:
+        buffer.append('(');
+        buffer.append(maxLength);
+        buffer.append(')');
+        break;
+      case LIST:
+      case MAP:
+      case UNION:
+        buffer.append('<');
+        for(int i=0; i < children.size(); ++i) {
+          if (i != 0) {
+            buffer.append(',');
+          }
+          children.get(i).printToBuffer(buffer);
+        }
+        buffer.append('>');
+        break;
+      case STRUCT:
+        buffer.append('<');
+        for(int i=0; i < children.size(); ++i) {
+          if (i != 0) {
+            buffer.append(',');
+          }
+          buffer.append(fieldNames.get(i));
+          buffer.append(':');
+          children.get(i).printToBuffer(buffer);
+        }
+        buffer.append('>');
+        break;
+      default:
+        break;
+    }
+  }
+
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    printToBuffer(buffer);
+    return buffer.toString();
+  }
+
+  private void printJsonToBuffer(String prefix, StringBuilder buffer,
+                                 int indent) {
+    for(int i=0; i < indent; ++i) {
+      buffer.append(' ');
+    }
+    buffer.append(prefix);
+    buffer.append("{\"category\": \"");
+    buffer.append(category.name);
+    buffer.append("\", \"id\": ");
+    buffer.append(getId());
+    buffer.append(", \"max\": ");
+    buffer.append(maxId);
+    switch (category) {
+      case DECIMAL:
+        buffer.append(", \"precision\": ");
+        buffer.append(precision);
+        buffer.append(", \"scale\": ");
+        buffer.append(scale);
+        break;
+      case CHAR:
+      case VARCHAR:
+        buffer.append(", \"length\": ");
+        buffer.append(maxLength);
+        break;
+      case LIST:
+      case MAP:
+      case UNION:
+        buffer.append(", \"children\": [");
+        for(int i=0; i < children.size(); ++i) {
+          buffer.append('\n');
+          children.get(i).printJsonToBuffer("", buffer, indent + 2);
+          if (i != children.size() - 1) {
+            buffer.append(',');
+          }
+        }
+        buffer.append("]");
+        break;
+      case STRUCT:
+        buffer.append(", \"fields\": [");
+        for(int i=0; i < children.size(); ++i) {
+          buffer.append('\n');
+          children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
+              buffer, indent + 2);
+          if (i != children.size() - 1) {
+            buffer.append(',');
+          }
+        }
+        buffer.append(']');
+        break;
+      default:
+        break;
+    }
+    buffer.append('}');
+  }
+
+  public String toJson() {
+    StringBuilder buffer = new StringBuilder();
+    printJsonToBuffer("", buffer, 0);
+    return buffer.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/Writer.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/Writer.java b/orc/src/java/org/apache/orc/Writer.java
new file mode 100644
index 0000000..4492062
--- /dev/null
+++ b/orc/src/java/org/apache/orc/Writer.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.TypeDescription;
+
+/**
+ * The interface for writing ORC files.
+ */
+public interface Writer {
+
+  /**
+   * Get the schema for this writer
+   * @return the file schema
+   */
+  TypeDescription getSchema();
+
+  /**
+   * Add arbitrary meta-data to the ORC file. This may be called at any point
+   * until the Writer is closed. If the same key is passed a second time, the
+   * second value will replace the first.
+   * @param key a key to label the data with.
+   * @param value the contents of the metadata.
+   */
+  void addUserMetadata(String key, ByteBuffer value);
+
+  /**
+   * Add a row batch to the ORC file.
+   * @param batch the rows to add
+   */
+  void addRowBatch(VectorizedRowBatch batch) throws IOException;
+
+  /**
+   * Flush all of the buffers and close the file. No methods on this writer
+   * should be called afterwards.
+   * @throws IOException
+   */
+  void close() throws IOException;
+
+  /**
+   * Return the deserialized data size. Raw data size will be compute when
+   * writing the file footer. Hence raw data size value will be available only
+   * after closing the writer.
+   *
+   * @return raw data size
+   */
+  long getRawDataSize();
+
+  /**
+   * Return the number of rows in file. Row count gets updated when flushing
+   * the stripes. To get accurate row count this method should be called after
+   * closing the writer.
+   *
+   * @return row count
+   */
+  long getNumberOfRows();
+
+  /**
+   * Write an intermediate footer on the file such that if the file is
+   * truncated to the returned offset, it would be a valid ORC file.
+   * @return the offset that would be a valid end location for an ORC file
+   */
+  long writeIntermediateFooter() throws IOException;
+
+  /**
+   * Fast stripe append to ORC file. This interface is used for fast ORC file
+   * merge with other ORC files. When merging, the file to be merged should pass
+   * stripe in binary form along with stripe information and stripe statistics.
+   * After appending last stripe of a file, use appendUserMetadata() to append
+   * any user metadata.
+   * @param stripe - stripe as byte array
+   * @param offset - offset within byte array
+   * @param length - length of stripe within byte array
+   * @param stripeInfo - stripe information
+   * @param stripeStatistics - stripe statistics (Protobuf objects can be
+   *                         merged directly)
+   * @throws IOException
+   */
+  public void appendStripe(byte[] stripe, int offset, int length,
+      StripeInformation stripeInfo,
+      OrcProto.StripeStatistics stripeStatistics) throws IOException;
+
+  /**
+   * When fast stripe append is used for merging ORC stripes, after appending
+   * the last stripe from a file, this interface must be used to merge any
+   * user metadata.
+   * @param userMetadata - user metadata
+   */
+  public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata);
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/BitFieldReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/orc/impl/BitFieldReader.java
new file mode 100644
index 0000000..8d9d3cb
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/BitFieldReader.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.PositionProvider;
+import org.apache.orc.impl.RunLengthByteReader;
+
+public class BitFieldReader {
+  private final RunLengthByteReader input;
+  /** The number of bits in one item. Non-test code always uses 1. */
+  private final int bitSize;
+  private int current;
+  private int bitsLeft;
+  private final int mask;
+
+  public BitFieldReader(InStream input,
+      int bitSize) throws IOException {
+    this.input = new RunLengthByteReader(input);
+    this.bitSize = bitSize;
+    mask = (1 << bitSize) - 1;
+  }
+
+  public void setInStream(InStream inStream) {
+    this.input.setInStream(inStream);
+  }
+
+  private void readByte() throws IOException {
+    if (input.hasNext()) {
+      current = 0xff & input.next();
+      bitsLeft = 8;
+    } else {
+      throw new EOFException("Read past end of bit field from " + this);
+    }
+  }
+
+  public int next() throws IOException {
+    int result = 0;
+    int bitsLeftToRead = bitSize;
+    while (bitsLeftToRead > bitsLeft) {
+      result <<= bitsLeft;
+      result |= current & ((1 << bitsLeft) - 1);
+      bitsLeftToRead -= bitsLeft;
+      readByte();
+    }
+    if (bitsLeftToRead > 0) {
+      result <<= bitsLeftToRead;
+      bitsLeft -= bitsLeftToRead;
+      result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+    }
+    return result & mask;
+  }
+
+  /**
+   * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead
+   * to figure out whether we have a run. Given that runs in booleans are likely it's worth it.
+   * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't
+   * work anymore once this is called. These is trivial to fix, but these are never interspersed.
+   */
+  private boolean lastRunValue;
+  private int lastRunLength = -1;
+  private void readNextRun(int maxRunLength) throws IOException {
+    assert bitSize == 1;
+    if (lastRunLength > 0) return; // last run is not exhausted yet
+    if (bitsLeft == 0) {
+      readByte();
+    }
+    // First take care of the partial bits.
+    boolean hasVal = false;
+    int runLength = 0;
+    if (bitsLeft != 8) {
+      int partialBitsMask = (1 << bitsLeft) - 1;
+      int partialBits = current & partialBitsMask;
+      if (partialBits == partialBitsMask || partialBits == 0) {
+        lastRunValue = (partialBits == partialBitsMask);
+        if (maxRunLength <= bitsLeft) {
+          lastRunLength = maxRunLength;
+          return;
+        }
+        maxRunLength -= bitsLeft;
+        hasVal = true;
+        runLength = bitsLeft;
+        bitsLeft = 0;
+      } else {
+        // There's no run in partial bits. Return whatever we have.
+        int prefixBitsCount = 32 - bitsLeft;
+        runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount;
+        lastRunValue = (runLength > 0);
+        lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength :
+          (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount));
+        return;
+      }
+      assert bitsLeft == 0;
+      readByte();
+    }
+    if (!hasVal) {
+      lastRunValue = ((current >> 7) == 1);
+      hasVal = true;
+    }
+    // Read full bytes until the run ends.
+    assert bitsLeft == 8;
+    while (maxRunLength >= 8
+        && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) {
+      runLength += 8;
+      maxRunLength -= 8;
+      readByte();
+    }
+    if (maxRunLength > 0) {
+      int extraBits = Integer.numberOfLeadingZeros(
+          lastRunValue ? (~(current | ~255)) : current) - 24;
+      bitsLeft -= extraBits;
+      runLength += extraBits;
+    }
+    lastRunLength = runLength;
+  }
+
+  public void nextVector(LongColumnVector previous,
+                         long previousLen) throws IOException {
+    previous.isRepeating = true;
+    for (int i = 0; i < previousLen; i++) {
+      if (!previous.isNull[i]) {
+        previous.vector[i] = next();
+      } else {
+        // The default value of null for int types in vectorized
+        // processing is 1, so set that if the value is null
+        previous.vector[i] = 1;
+      }
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
+        previous.isRepeating = false;
+      }
+    }
+  }
+
+  public void seek(PositionProvider index) throws IOException {
+    input.seek(index);
+    int consumed = (int) index.getNext();
+    if (consumed > 8) {
+      throw new IllegalArgumentException("Seek past end of byte at " +
+          consumed + " in " + input);
+    } else if (consumed != 0) {
+      readByte();
+      bitsLeft = 8 - consumed;
+    } else {
+      bitsLeft = 0;
+    }
+  }
+
+  public void skip(long items) throws IOException {
+    long totalBits = bitSize * items;
+    if (bitsLeft >= totalBits) {
+      bitsLeft -= totalBits;
+    } else {
+      totalBits -= bitsLeft;
+      input.skip(totalBits / 8);
+      current = input.next();
+      bitsLeft = (int) (8 - (totalBits % 8));
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "bit reader current: " + current + " bits left: " + bitsLeft +
+        " bit size: " + bitSize + " from " + input;
+  }
+
+  boolean hasFullByte() {
+    return bitsLeft == 8 || bitsLeft == 0;
+  }
+
+  int peekOneBit() throws IOException {
+    assert bitSize == 1;
+    if (bitsLeft == 0) {
+      readByte();
+    }
+    return (current >>> (bitsLeft - 1)) & 1;
+  }
+
+  int peekFullByte() throws IOException {
+    assert bitSize == 1;
+    assert bitsLeft == 8 || bitsLeft == 0;
+    if (bitsLeft == 0) {
+      readByte();
+    }
+    return current;
+  }
+
+  void skipInCurrentByte(int bits) throws IOException {
+    assert bitsLeft >= bits;
+    bitsLeft -= bits;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java b/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
new file mode 100644
index 0000000..aa5f886
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/BitFieldWriter.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import org.apache.orc.impl.PositionRecorder;
+import org.apache.orc.impl.PositionedOutputStream;
+import org.apache.orc.impl.RunLengthByteWriter;
+
+import java.io.IOException;
+
+public class BitFieldWriter {
+  private RunLengthByteWriter output;
+  private final int bitSize;
+  private byte current = 0;
+  private int bitsLeft = 8;
+
+  public BitFieldWriter(PositionedOutputStream output,
+                 int bitSize) throws IOException {
+    this.output = new RunLengthByteWriter(output);
+    this.bitSize = bitSize;
+  }
+
+  private void writeByte() throws IOException {
+    output.write(current);
+    current = 0;
+    bitsLeft = 8;
+  }
+
+  public void flush() throws IOException {
+    if (bitsLeft != 8) {
+      writeByte();
+    }
+    output.flush();
+  }
+
+  public void write(int value) throws IOException {
+    int bitsToWrite = bitSize;
+    while (bitsToWrite > bitsLeft) {
+      // add the bits to the bottom of the current word
+      current |= value >>> (bitsToWrite - bitsLeft);
+      // subtract out the bits we just added
+      bitsToWrite -= bitsLeft;
+      // zero out the bits above bitsToWrite
+      value &= (1 << bitsToWrite) - 1;
+      writeByte();
+    }
+    bitsLeft -= bitsToWrite;
+    current |= value << bitsLeft;
+    if (bitsLeft == 0) {
+      writeByte();
+    }
+  }
+
+  public void getPosition(PositionRecorder recorder) throws IOException {
+    output.getPosition(recorder);
+    recorder.addPosition(8 - bitsLeft);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/BufferChunk.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/BufferChunk.java b/orc/src/java/org/apache/orc/impl/BufferChunk.java
new file mode 100644
index 0000000..da43b96
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/BufferChunk.java
@@ -0,0 +1,85 @@
+package org.apache.orc.impl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+
+/**
+ * The sections of stripe that we have read.
+ * This might not match diskRange - 1 disk range can be multiple buffer chunks,
+ * depending on DFS block boundaries.
+ */
+public class BufferChunk extends DiskRangeList {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(BufferChunk.class);
+  final ByteBuffer chunk;
+
+  public BufferChunk(ByteBuffer chunk, long offset) {
+    super(offset, offset + chunk.remaining());
+    this.chunk = chunk;
+  }
+
+  public ByteBuffer getChunk() {
+    return chunk;
+  }
+
+  @Override
+  public boolean hasData() {
+    return chunk != null;
+  }
+
+  @Override
+  public final String toString() {
+    boolean makesSense = chunk.remaining() == (end - offset);
+    return "data range [" + offset + ", " + end + "), size: " + chunk.remaining()
+        + (makesSense ? "" : "(!)") + " type: " +
+        (chunk.isDirect() ? "direct" : "array-backed");
+  }
+
+  @Override
+  public DiskRange sliceAndShift(long offset, long end, long shiftBy) {
+    assert offset <= end && offset >= this.offset && end <= this.end;
+    assert offset + shiftBy >= 0;
+    ByteBuffer sliceBuf = chunk.slice();
+    int newPos = (int) (offset - this.offset);
+    int newLimit = newPos + (int) (end - offset);
+    try {
+      sliceBuf.position(newPos);
+      sliceBuf.limit(newLimit);
+    } catch (Throwable t) {
+      LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end
+          + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", "
+          + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") "
+          + t.getClass());
+      throw new RuntimeException(t);
+    }
+    return new BufferChunk(sliceBuf, offset + shiftBy);
+  }
+
+  @Override
+  public ByteBuffer getData() {
+    return chunk;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
new file mode 100644
index 0000000..745ed9a
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -0,0 +1,1097 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.orc.BinaryColumnStatistics;
+import org.apache.orc.BooleanColumnStatistics;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.DateColumnStatistics;
+import org.apache.orc.DecimalColumnStatistics;
+import org.apache.orc.DoubleColumnStatistics;
+import org.apache.orc.IntegerColumnStatistics;
+import org.apache.orc.OrcProto;
+import org.apache.orc.StringColumnStatistics;
+import org.apache.orc.TimestampColumnStatistics;
+import org.apache.orc.TypeDescription;
+
+public class ColumnStatisticsImpl implements ColumnStatistics {
+
+  private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
+      implements BooleanColumnStatistics {
+    private long trueCount = 0;
+
+    BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
+      trueCount = bkt.getCount(0);
+    }
+
+    BooleanStatisticsImpl() {
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      trueCount = 0;
+    }
+
+    @Override
+    public void updateBoolean(boolean value, int repetitions) {
+      if (value) {
+        trueCount += repetitions;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof BooleanStatisticsImpl) {
+        BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
+        trueCount += bkt.trueCount;
+      } else {
+        if (isStatsExists() && trueCount != 0) {
+          throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.BucketStatistics.Builder bucket =
+        OrcProto.BucketStatistics.newBuilder();
+      bucket.addCount(trueCount);
+      builder.setBucketStatistics(bucket);
+      return builder;
+    }
+
+    @Override
+    public long getFalseCount() {
+      return getNumberOfValues() - trueCount;
+    }
+
+    @Override
+    public long getTrueCount() {
+      return trueCount;
+    }
+
+    @Override
+    public String toString() {
+      return super.toString() + " true: " + trueCount;
+    }
+  }
+
+  private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
+      implements IntegerColumnStatistics {
+
+    private long minimum = Long.MAX_VALUE;
+    private long maximum = Long.MIN_VALUE;
+    private long sum = 0;
+    private boolean hasMinimum = false;
+    private boolean overflow = false;
+
+    IntegerStatisticsImpl() {
+    }
+
+    IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
+      if (intStat.hasMinimum()) {
+        hasMinimum = true;
+        minimum = intStat.getMinimum();
+      }
+      if (intStat.hasMaximum()) {
+        maximum = intStat.getMaximum();
+      }
+      if (intStat.hasSum()) {
+        sum = intStat.getSum();
+      } else {
+        overflow = true;
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      hasMinimum = false;
+      minimum = Long.MAX_VALUE;
+      maximum = Long.MIN_VALUE;
+      sum = 0;
+      overflow = false;
+    }
+
+    @Override
+    public void updateInteger(long value, int repetitions) {
+      if (!hasMinimum) {
+        hasMinimum = true;
+        minimum = value;
+        maximum = value;
+      } else if (value < minimum) {
+        minimum = value;
+      } else if (value > maximum) {
+        maximum = value;
+      }
+      if (!overflow) {
+        boolean wasPositive = sum >= 0;
+        sum += value * repetitions;
+        if ((value >= 0) == wasPositive) {
+          overflow = (sum >= 0) != wasPositive;
+        }
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof IntegerStatisticsImpl) {
+        IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
+        if (!hasMinimum) {
+          hasMinimum = otherInt.hasMinimum;
+          minimum = otherInt.minimum;
+          maximum = otherInt.maximum;
+        } else if (otherInt.hasMinimum) {
+          if (otherInt.minimum < minimum) {
+            minimum = otherInt.minimum;
+          }
+          if (otherInt.maximum > maximum) {
+            maximum = otherInt.maximum;
+          }
+        }
+
+        overflow |= otherInt.overflow;
+        if (!overflow) {
+          boolean wasPositive = sum >= 0;
+          sum += otherInt.sum;
+          if ((otherInt.sum >= 0) == wasPositive) {
+            overflow = (sum >= 0) != wasPositive;
+          }
+        }
+      } else {
+        if (isStatsExists() && hasMinimum) {
+          throw new IllegalArgumentException("Incompatible merging of integer column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.IntegerStatistics.Builder intb =
+        OrcProto.IntegerStatistics.newBuilder();
+      if (hasMinimum) {
+        intb.setMinimum(minimum);
+        intb.setMaximum(maximum);
+      }
+      if (!overflow) {
+        intb.setSum(sum);
+      }
+      builder.setIntStatistics(intb);
+      return builder;
+    }
+
+    @Override
+    public long getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public long getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public boolean isSumDefined() {
+      return !overflow;
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (hasMinimum) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      if (!overflow) {
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
+       implements DoubleColumnStatistics {
+    private boolean hasMinimum = false;
+    private double minimum = Double.MAX_VALUE;
+    private double maximum = Double.MIN_VALUE;
+    private double sum = 0;
+
+    DoubleStatisticsImpl() {
+    }
+
+    DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
+      if (dbl.hasMinimum()) {
+        hasMinimum = true;
+        minimum = dbl.getMinimum();
+      }
+      if (dbl.hasMaximum()) {
+        maximum = dbl.getMaximum();
+      }
+      if (dbl.hasSum()) {
+        sum = dbl.getSum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      hasMinimum = false;
+      minimum = Double.MAX_VALUE;
+      maximum = Double.MIN_VALUE;
+      sum = 0;
+    }
+
+    @Override
+    public void updateDouble(double value) {
+      if (!hasMinimum) {
+        hasMinimum = true;
+        minimum = value;
+        maximum = value;
+      } else if (value < minimum) {
+        minimum = value;
+      } else if (value > maximum) {
+        maximum = value;
+      }
+      sum += value;
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof DoubleStatisticsImpl) {
+        DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
+        if (!hasMinimum) {
+          hasMinimum = dbl.hasMinimum;
+          minimum = dbl.minimum;
+          maximum = dbl.maximum;
+        } else if (dbl.hasMinimum) {
+          if (dbl.minimum < minimum) {
+            minimum = dbl.minimum;
+          }
+          if (dbl.maximum > maximum) {
+            maximum = dbl.maximum;
+          }
+        }
+        sum += dbl.sum;
+      } else {
+        if (isStatsExists() && hasMinimum) {
+          throw new IllegalArgumentException("Incompatible merging of double column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.DoubleStatistics.Builder dbl =
+        OrcProto.DoubleStatistics.newBuilder();
+      if (hasMinimum) {
+        dbl.setMinimum(minimum);
+        dbl.setMaximum(maximum);
+      }
+      dbl.setSum(sum);
+      builder.setDoubleStatistics(dbl);
+      return builder;
+    }
+
+    @Override
+    public double getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public double getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public double getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (hasMinimum) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      buf.append(" sum: ");
+      buf.append(sum);
+      return buf.toString();
+    }
+  }
+
+  protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
+      implements StringColumnStatistics {
+    private Text minimum = null;
+    private Text maximum = null;
+    private long sum = 0;
+
+    StringStatisticsImpl() {
+    }
+
+    StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.StringStatistics str = stats.getStringStatistics();
+      if (str.hasMaximum()) {
+        maximum = new Text(str.getMaximum());
+      }
+      if (str.hasMinimum()) {
+        minimum = new Text(str.getMinimum());
+      }
+      if(str.hasSum()) {
+        sum = str.getSum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+      sum = 0;
+    }
+
+    @Override
+    public void updateString(Text value) {
+      if (minimum == null) {
+        maximum = minimum = new Text(value);
+      } else if (minimum.compareTo(value) > 0) {
+        minimum = new Text(value);
+      } else if (maximum.compareTo(value) < 0) {
+        maximum = new Text(value);
+      }
+      sum += value.getLength();
+    }
+
+    @Override
+    public void updateString(byte[] bytes, int offset, int length,
+                             int repetitions) {
+      if (minimum == null) {
+        maximum = minimum = new Text();
+        maximum.set(bytes, offset, length);
+      } else if (WritableComparator.compareBytes(minimum.getBytes(), 0,
+          minimum.getLength(), bytes, offset, length) > 0) {
+        minimum = new Text();
+        minimum.set(bytes, offset, length);
+      } else if (WritableComparator.compareBytes(maximum.getBytes(), 0,
+          maximum.getLength(), bytes, offset, length) < 0) {
+        maximum = new Text();
+        maximum.set(bytes, offset, length);
+      }
+      sum += length * repetitions;
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof StringStatisticsImpl) {
+        StringStatisticsImpl str = (StringStatisticsImpl) other;
+        if (minimum == null) {
+          if (str.minimum != null) {
+            maximum = new Text(str.getMaximum());
+            minimum = new Text(str.getMinimum());
+          } else {
+          /* both are empty */
+            maximum = minimum = null;
+          }
+        } else if (str.minimum != null) {
+          if (minimum.compareTo(str.minimum) > 0) {
+            minimum = new Text(str.getMinimum());
+          }
+          if (maximum.compareTo(str.maximum) < 0) {
+            maximum = new Text(str.getMaximum());
+          }
+        }
+        sum += str.sum;
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of string column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.StringStatistics.Builder str =
+        OrcProto.StringStatistics.newBuilder();
+      if (getNumberOfValues() != 0) {
+        str.setMinimum(getMinimum());
+        str.setMaximum(getMaximum());
+        str.setSum(sum);
+      }
+      result.setStringStatistics(str);
+      return result;
+    }
+
+    @Override
+    public String getMinimum() {
+      return minimum == null ? null : minimum.toString();
+    }
+
+    @Override
+    public String getMaximum() {
+      return maximum == null ? null : maximum.toString();
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(getMinimum());
+        buf.append(" max: ");
+        buf.append(getMaximum());
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
+      BinaryColumnStatistics {
+
+    private long sum = 0;
+
+    BinaryStatisticsImpl() {
+    }
+
+    BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
+      if (binStats.hasSum()) {
+        sum = binStats.getSum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      sum = 0;
+    }
+
+    @Override
+    public void updateBinary(BytesWritable value) {
+      sum += value.getLength();
+    }
+
+    @Override
+    public void updateBinary(byte[] bytes, int offset, int length,
+                             int repetitions) {
+      sum += length * repetitions;
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof BinaryColumnStatistics) {
+        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+        sum += bin.sum;
+      } else {
+        if (isStatsExists() && sum != 0) {
+          throw new IllegalArgumentException("Incompatible merging of binary column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
+      bin.setSum(sum);
+      result.setBinaryStatistics(bin);
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
+      implements DecimalColumnStatistics {
+    private HiveDecimal minimum = null;
+    private HiveDecimal maximum = null;
+    private HiveDecimal sum = HiveDecimal.ZERO;
+
+    DecimalStatisticsImpl() {
+    }
+
+    DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
+      if (dec.hasMaximum()) {
+        maximum = HiveDecimal.create(dec.getMaximum());
+      }
+      if (dec.hasMinimum()) {
+        minimum = HiveDecimal.create(dec.getMinimum());
+      }
+      if (dec.hasSum()) {
+        sum = HiveDecimal.create(dec.getSum());
+      } else {
+        sum = null;
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+      sum = HiveDecimal.ZERO;
+    }
+
+    @Override
+    public void updateDecimal(HiveDecimal value) {
+      if (minimum == null) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum.compareTo(value) > 0) {
+        minimum = value;
+      } else if (maximum.compareTo(value) < 0) {
+        maximum = value;
+      }
+      if (sum != null) {
+        sum = sum.add(value);
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof DecimalStatisticsImpl) {
+        DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
+        if (minimum == null) {
+          minimum = dec.minimum;
+          maximum = dec.maximum;
+          sum = dec.sum;
+        } else if (dec.minimum != null) {
+          if (minimum.compareTo(dec.minimum) > 0) {
+            minimum = dec.minimum;
+          }
+          if (maximum.compareTo(dec.maximum) < 0) {
+            maximum = dec.maximum;
+          }
+          if (sum == null || dec.sum == null) {
+            sum = null;
+          } else {
+            sum = sum.add(dec.sum);
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.DecimalStatistics.Builder dec =
+          OrcProto.DecimalStatistics.newBuilder();
+      if (getNumberOfValues() != 0 && minimum != null) {
+        dec.setMinimum(minimum.toString());
+        dec.setMaximum(maximum.toString());
+      }
+      if (sum != null) {
+        dec.setSum(sum.toString());
+      }
+      result.setDecimalStatistics(dec);
+      return result;
+    }
+
+    @Override
+    public HiveDecimal getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public HiveDecimal getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public HiveDecimal getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+        if (sum != null) {
+          buf.append(" sum: ");
+          buf.append(sum);
+        }
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DateStatisticsImpl extends ColumnStatisticsImpl
+      implements DateColumnStatistics {
+    private Integer minimum = null;
+    private Integer maximum = null;
+
+    DateStatisticsImpl() {
+    }
+
+    DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DateStatistics dateStats = stats.getDateStatistics();
+      // min,max values serialized/deserialized as int (days since epoch)
+      if (dateStats.hasMaximum()) {
+        maximum = dateStats.getMaximum();
+      }
+      if (dateStats.hasMinimum()) {
+        minimum = dateStats.getMinimum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+    }
+
+    @Override
+    public void updateDate(DateWritable value) {
+      if (minimum == null) {
+        minimum = value.getDays();
+        maximum = value.getDays();
+      } else if (minimum > value.getDays()) {
+        minimum = value.getDays();
+      } else if (maximum < value.getDays()) {
+        maximum = value.getDays();
+      }
+    }
+
+    @Override
+    public void updateDate(int value) {
+      if (minimum == null) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum > value) {
+        minimum = value;
+      } else if (maximum < value) {
+        maximum = value;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof DateStatisticsImpl) {
+        DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
+        if (minimum == null) {
+          minimum = dateStats.minimum;
+          maximum = dateStats.maximum;
+        } else if (dateStats.minimum != null) {
+          if (minimum > dateStats.minimum) {
+            minimum = dateStats.minimum;
+          }
+          if (maximum < dateStats.maximum) {
+            maximum = dateStats.maximum;
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of date column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.DateStatistics.Builder dateStats =
+          OrcProto.DateStatistics.newBuilder();
+      if (getNumberOfValues() != 0 && minimum != null) {
+        dateStats.setMinimum(minimum);
+        dateStats.setMaximum(maximum);
+      }
+      result.setDateStatistics(dateStats);
+      return result;
+    }
+
+    private transient final DateWritable minDate = new DateWritable();
+    private transient final DateWritable maxDate = new DateWritable();
+
+    @Override
+    public Date getMinimum() {
+      if (minimum == null) {
+        return null;
+      }
+      minDate.set(minimum);
+      return minDate.get();
+    }
+
+    @Override
+    public Date getMaximum() {
+      if (maximum == null) {
+        return null;
+      }
+      maxDate.set(maximum);
+      return maxDate.get();
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(getMinimum());
+        buf.append(" max: ");
+        buf.append(getMaximum());
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
+      implements TimestampColumnStatistics {
+    private Long minimum = null;
+    private Long maximum = null;
+
+    TimestampStatisticsImpl() {
+    }
+
+    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
+      // min,max values serialized/deserialized as int (milliseconds since epoch)
+      if (timestampStats.hasMaximum()) {
+        maximum = timestampStats.getMaximum();
+      }
+      if (timestampStats.hasMinimum()) {
+        minimum = timestampStats.getMinimum();
+      }
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+    }
+
+    @Override
+    public void updateTimestamp(Timestamp value) {
+      if (minimum == null) {
+        minimum = value.getTime();
+        maximum = value.getTime();
+      } else if (minimum > value.getTime()) {
+        minimum = value.getTime();
+      } else if (maximum < value.getTime()) {
+        maximum = value.getTime();
+      }
+    }
+
+    @Override
+    public void updateTimestamp(long value) {
+      if (minimum == null) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum > value) {
+        minimum = value;
+      } else if (maximum < value) {
+        maximum = value;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof TimestampStatisticsImpl) {
+        TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
+        if (minimum == null) {
+          minimum = timestampStats.minimum;
+          maximum = timestampStats.maximum;
+        } else if (timestampStats.minimum != null) {
+          if (minimum > timestampStats.minimum) {
+            minimum = timestampStats.minimum;
+          }
+          if (maximum < timestampStats.maximum) {
+            maximum = timestampStats.maximum;
+          }
+        }
+      } else {
+        if (isStatsExists() && minimum != null) {
+          throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
+          .newBuilder();
+      if (getNumberOfValues() != 0 && minimum != null) {
+        timestampStats.setMinimum(minimum);
+        timestampStats.setMaximum(maximum);
+      }
+      result.setTimestampStatistics(timestampStats);
+      return result;
+    }
+
+    @Override
+    public Timestamp getMinimum() {
+      return minimum == null ? null : new Timestamp(minimum);
+    }
+
+    @Override
+    public Timestamp getMaximum() {
+      return maximum == null ? null : new Timestamp(maximum);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(getMinimum());
+        buf.append(" max: ");
+        buf.append(getMaximum());
+      }
+      return buf.toString();
+    }
+  }
+
+  private long count = 0;
+  private boolean hasNull = false;
+
+  ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
+    if (stats.hasNumberOfValues()) {
+      count = stats.getNumberOfValues();
+    }
+
+    if (stats.hasHasNull()) {
+      hasNull = stats.getHasNull();
+    } else {
+      hasNull = true;
+    }
+  }
+
+  ColumnStatisticsImpl() {
+  }
+
+  public void increment() {
+    count += 1;
+  }
+
+  public void increment(int count) {
+    this.count += count;
+  }
+
+  public void setNull() {
+    hasNull = true;
+  }
+
+  public void updateBoolean(boolean value, int repetitions) {
+    throw new UnsupportedOperationException("Can't update boolean");
+  }
+
+  public void updateInteger(long value, int repetitions) {
+    throw new UnsupportedOperationException("Can't update integer");
+  }
+
+  public void updateDouble(double value) {
+    throw new UnsupportedOperationException("Can't update double");
+  }
+
+  public void updateString(Text value) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  public void updateString(byte[] bytes, int offset, int length,
+                           int repetitions) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  public void updateBinary(BytesWritable value) {
+    throw new UnsupportedOperationException("Can't update binary");
+  }
+
+  public void updateBinary(byte[] bytes, int offset, int length,
+                           int repetitions) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  public void updateDecimal(HiveDecimal value) {
+    throw new UnsupportedOperationException("Can't update decimal");
+  }
+
+  public void updateDate(DateWritable value) {
+    throw new UnsupportedOperationException("Can't update date");
+  }
+
+  public void updateDate(int value) {
+    throw new UnsupportedOperationException("Can't update date");
+  }
+
+  public void updateTimestamp(Timestamp value) {
+    throw new UnsupportedOperationException("Can't update timestamp");
+  }
+
+  public void updateTimestamp(long value) {
+    throw new UnsupportedOperationException("Can't update timestamp");
+  }
+
+  public boolean isStatsExists() {
+    return (count > 0 || hasNull == true);
+  }
+
+  public void merge(ColumnStatisticsImpl stats) {
+    count += stats.count;
+    hasNull |= stats.hasNull;
+  }
+
+  public void reset() {
+    count = 0;
+    hasNull = false;
+  }
+
+  @Override
+  public long getNumberOfValues() {
+    return count;
+  }
+
+  @Override
+  public boolean hasNull() {
+    return hasNull;
+  }
+
+  @Override
+  public String toString() {
+    return "count: " + count + " hasNull: " + hasNull;
+  }
+
+  public OrcProto.ColumnStatistics.Builder serialize() {
+    OrcProto.ColumnStatistics.Builder builder =
+      OrcProto.ColumnStatistics.newBuilder();
+    builder.setNumberOfValues(count);
+    builder.setHasNull(hasNull);
+    return builder;
+  }
+
+  public static ColumnStatisticsImpl create(TypeDescription schema) {
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        return new BooleanStatisticsImpl();
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+        return new IntegerStatisticsImpl();
+      case FLOAT:
+      case DOUBLE:
+        return new DoubleStatisticsImpl();
+      case STRING:
+      case CHAR:
+      case VARCHAR:
+        return new StringStatisticsImpl();
+      case DECIMAL:
+        return new DecimalStatisticsImpl();
+      case DATE:
+        return new DateStatisticsImpl();
+      case TIMESTAMP:
+        return new TimestampStatisticsImpl();
+      case BINARY:
+        return new BinaryStatisticsImpl();
+      default:
+        return new ColumnStatisticsImpl();
+    }
+  }
+
+  public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
+    if (stats.hasBucketStatistics()) {
+      return new BooleanStatisticsImpl(stats);
+    } else if (stats.hasIntStatistics()) {
+      return new IntegerStatisticsImpl(stats);
+    } else if (stats.hasDoubleStatistics()) {
+      return new DoubleStatisticsImpl(stats);
+    } else if (stats.hasStringStatistics()) {
+      return new StringStatisticsImpl(stats);
+    } else if (stats.hasDecimalStatistics()) {
+      return new DecimalStatisticsImpl(stats);
+    } else if (stats.hasDateStatistics()) {
+      return new DateStatisticsImpl(stats);
+    } else if (stats.hasTimestampStatistics()) {
+      return new TimestampStatisticsImpl(stats);
+    } else if(stats.hasBinaryStatistics()) {
+      return new BinaryStatisticsImpl(stats);
+    } else {
+      return new ColumnStatisticsImpl(stats);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java b/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
new file mode 100644
index 0000000..7e0110d
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/DirectDecompressionCodec.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import org.apache.orc.CompressionCodec;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public interface DirectDecompressionCodec extends CompressionCodec {
+  public boolean isAvailable();
+  public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/DynamicByteArray.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DynamicByteArray.java b/orc/src/java/org/apache/orc/impl/DynamicByteArray.java
new file mode 100644
index 0000000..986c2ac
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/DynamicByteArray.java
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * A class that is a growable array of bytes. Growth is managed in terms of
+ * chunks that are allocated when needed.
+ */
+public final class DynamicByteArray {
+  static final int DEFAULT_CHUNKSIZE = 32 * 1024;
+  static final int DEFAULT_NUM_CHUNKS = 128;
+
+  private final int chunkSize;        // our allocation sizes
+  private byte[][] data;              // the real data
+  private int length;                 // max set element index +1
+  private int initializedChunks = 0;  // the number of chunks created
+
+  public DynamicByteArray() {
+    this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
+  }
+
+  public DynamicByteArray(int numChunks, int chunkSize) {
+    if (chunkSize == 0) {
+      throw new IllegalArgumentException("bad chunksize");
+    }
+    this.chunkSize = chunkSize;
+    data = new byte[numChunks][];
+  }
+
+  /**
+   * Ensure that the given index is valid.
+   */
+  private void grow(int chunkIndex) {
+    if (chunkIndex >= initializedChunks) {
+      if (chunkIndex >= data.length) {
+        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+        byte[][] newChunk = new byte[newSize][];
+        System.arraycopy(data, 0, newChunk, 0, data.length);
+        data = newChunk;
+      }
+      for(int i=initializedChunks; i <= chunkIndex; ++i) {
+        data[i] = new byte[chunkSize];
+      }
+      initializedChunks = chunkIndex + 1;
+    }
+  }
+
+  public byte get(int index) {
+    if (index >= length) {
+      throw new IndexOutOfBoundsException("Index " + index +
+                                            " is outside of 0.." +
+                                            (length - 1));
+    }
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    return data[i][j];
+  }
+
+  public void set(int index, byte value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] = value;
+  }
+
+  public int add(byte value) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow(i);
+    data[i][j] = value;
+    int result = length;
+    length += 1;
+    return result;
+  }
+
+  /**
+   * Copy a slice of a byte array into our buffer.
+   * @param value the array to copy from
+   * @param valueOffset the first location to copy from value
+   * @param valueLength the number of bytes to copy from value
+   * @return the offset of the start of the value
+   */
+  public int add(byte[] value, int valueOffset, int valueLength) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow((length + valueLength) / chunkSize);
+    int remaining = valueLength;
+    while (remaining > 0) {
+      int size = Math.min(remaining, chunkSize - j);
+      System.arraycopy(value, valueOffset, data[i], j, size);
+      remaining -= size;
+      valueOffset += size;
+      i += 1;
+      j = 0;
+    }
+    int result = length;
+    length += valueLength;
+    return result;
+  }
+
+  /**
+   * Read the entire stream into this array.
+   * @param in the stream to read from
+   * @throws IOException
+   */
+  public void readAll(InputStream in) throws IOException {
+    int currentChunk = length / chunkSize;
+    int currentOffset = length % chunkSize;
+    grow(currentChunk);
+    int currentLength = in.read(data[currentChunk], currentOffset,
+      chunkSize - currentOffset);
+    while (currentLength > 0) {
+      length += currentLength;
+      currentOffset = length % chunkSize;
+      if (currentOffset == 0) {
+        currentChunk = length / chunkSize;
+        grow(currentChunk);
+      }
+      currentLength = in.read(data[currentChunk], currentOffset,
+        chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Byte compare a set of bytes against the bytes in this dynamic array.
+   * @param other source of the other bytes
+   * @param otherOffset start offset in the other array
+   * @param otherLength number of bytes in the other array
+   * @param ourOffset the offset in our array
+   * @param ourLength the number of bytes in our array
+   * @return negative for less, 0 for equal, positive for greater
+   */
+  public int compare(byte[] other, int otherOffset, int otherLength,
+                     int ourOffset, int ourLength) {
+    int currentChunk = ourOffset / chunkSize;
+    int currentOffset = ourOffset % chunkSize;
+    int maxLength = Math.min(otherLength, ourLength);
+    while (maxLength > 0 &&
+      other[otherOffset] == data[currentChunk][currentOffset]) {
+      otherOffset += 1;
+      currentOffset += 1;
+      if (currentOffset == chunkSize) {
+        currentChunk += 1;
+        currentOffset = 0;
+      }
+      maxLength -= 1;
+    }
+    if (maxLength == 0) {
+      return otherLength - ourLength;
+    }
+    int otherByte = 0xff & other[otherOffset];
+    int ourByte = 0xff & data[currentChunk][currentOffset];
+    return otherByte > ourByte ? 1 : -1;
+  }
+
+  /**
+   * Get the size of the array.
+   * @return the number of bytes in the array
+   */
+  public int size() {
+    return length;
+  }
+
+  /**
+   * Clear the array to its original pristine state.
+   */
+  public void clear() {
+    length = 0;
+    for(int i=0; i < data.length; ++i) {
+      data[i] = null;
+    }
+    initializedChunks = 0;
+  }
+
+  /**
+   * Set a text value from the bytes in this dynamic array.
+   * @param result the value to set
+   * @param offset the start of the bytes to copy
+   * @param length the number of bytes to copy
+   */
+  public void setText(Text result, int offset, int length) {
+    result.clear();
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    int currentLength = Math.min(length, chunkSize - currentOffset);
+    while (length > 0) {
+      result.append(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+      currentLength = Math.min(length, chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Write out a range of this dynamic array to an output stream.
+   * @param out the stream to write to
+   * @param offset the first offset to write
+   * @param length the number of bytes to write
+   * @throws IOException
+   */
+  public void write(OutputStream out, int offset,
+                    int length) throws IOException {
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    while (length > 0) {
+      int currentLength = Math.min(length, chunkSize - currentOffset);
+      out.write(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+    }
+  }
+
+  @Override
+  public String toString() {
+    int i;
+    StringBuilder sb = new StringBuilder(length * 3);
+
+    sb.append('{');
+    int l = length - 1;
+    for (i=0; i<l; i++) {
+      sb.append(Integer.toHexString(get(i)));
+      sb.append(',');
+    }
+    sb.append(get(i));
+    sb.append('}');
+
+    return sb.toString();
+  }
+
+  public void setByteBuffer(ByteBuffer result, int offset, int length) {
+    result.clear();
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    int currentLength = Math.min(length, chunkSize - currentOffset);
+    while (length > 0) {
+      result.put(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+      currentLength = Math.min(length, chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Gets all the bytes of the array.
+   *
+   * @return Bytes of the array
+   */
+  public byte[] get() {
+    byte[] result = null;
+    if (length > 0) {
+      int currentChunk = 0;
+      int currentOffset = 0;
+      int currentLength = Math.min(length, chunkSize);
+      int destOffset = 0;
+      result = new byte[length];
+      int totalLength = length;
+      while (totalLength > 0) {
+        System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength);
+        destOffset += currentLength;
+        totalLength -= currentLength;
+        currentChunk += 1;
+        currentOffset = 0;
+        currentLength = Math.min(totalLength, chunkSize - currentOffset);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Get the size of the buffers.
+   */
+  public long getSizeInBytes() {
+    return initializedChunks * chunkSize;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/DynamicIntArray.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/DynamicIntArray.java b/orc/src/java/org/apache/orc/impl/DynamicIntArray.java
new file mode 100644
index 0000000..3b2884b
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/DynamicIntArray.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+/**
+ * Dynamic int array that uses primitive types and chunks to avoid copying
+ * large number of integers when it resizes.
+ *
+ * The motivation for this class is memory optimization, i.e. space efficient
+ * storage of potentially huge arrays without good a-priori size guesses.
+ *
+ * The API of this class is between a primitive array and a AbstractList. It's
+ * not a Collection implementation because it handles primitive types, but the
+ * API could be extended to support iterators and the like.
+ *
+ * NOTE: Like standard Collection implementations/arrays, this class is not
+ * synchronized.
+ */
+public final class DynamicIntArray {
+  static final int DEFAULT_CHUNKSIZE = 8 * 1024;
+  static final int INIT_CHUNKS = 128;
+
+  private final int chunkSize;       // our allocation size
+  private int[][] data;              // the real data
+  private int length;                // max set element index +1
+  private int initializedChunks = 0; // the number of created chunks
+
+  public DynamicIntArray() {
+    this(DEFAULT_CHUNKSIZE);
+  }
+
+  public DynamicIntArray(int chunkSize) {
+    this.chunkSize = chunkSize;
+
+    data = new int[INIT_CHUNKS][];
+  }
+
+  /**
+   * Ensure that the given index is valid.
+   */
+  private void grow(int chunkIndex) {
+    if (chunkIndex >= initializedChunks) {
+      if (chunkIndex >= data.length) {
+        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+        int[][] newChunk = new int[newSize][];
+        System.arraycopy(data, 0, newChunk, 0, data.length);
+        data = newChunk;
+      }
+      for (int i=initializedChunks; i <= chunkIndex; ++i) {
+        data[i] = new int[chunkSize];
+      }
+      initializedChunks = chunkIndex + 1;
+    }
+  }
+
+  public int get(int index) {
+    if (index >= length) {
+      throw new IndexOutOfBoundsException("Index " + index +
+                                            " is outside of 0.." +
+                                            (length - 1));
+    }
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    return data[i][j];
+  }
+
+  public void set(int index, int value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] = value;
+  }
+
+  public void increment(int index, int value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] += value;
+  }
+
+  public void add(int value) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow(i);
+    data[i][j] = value;
+    length += 1;
+  }
+
+  public int size() {
+    return length;
+  }
+
+  public void clear() {
+    length = 0;
+    for(int i=0; i < data.length; ++i) {
+      data[i] = null;
+    }
+    initializedChunks = 0;
+  }
+
+  public String toString() {
+    int i;
+    StringBuilder sb = new StringBuilder(length * 4);
+
+    sb.append('{');
+    int l = length - 1;
+    for (i=0; i<l; i++) {
+      sb.append(get(i));
+      sb.append(',');
+    }
+    sb.append(get(i));
+    sb.append('}');
+
+    return sb.toString();
+  }
+
+  public int getSizeInBytes() {
+    return 4 * initializedChunks * chunkSize;
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/HadoopShims.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShims.java b/orc/src/java/org/apache/orc/impl/HadoopShims.java
new file mode 100644
index 0000000..2980d71
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/HadoopShims.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.hadoop.util.VersionInfo;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public interface HadoopShims {
+
+  enum DirectCompressionType {
+    NONE,
+    ZLIB_NOHEADER,
+    ZLIB,
+    SNAPPY,
+  }
+
+  interface DirectDecompressor {
+    void decompress(ByteBuffer var1, ByteBuffer var2) throws IOException;
+  }
+
+  /**
+   * Get a direct decompressor codec, if it is available
+   * @param codec
+   * @return
+   */
+  DirectDecompressor getDirectDecompressor(DirectCompressionType codec);
+
+
+  class Factory {
+    private static HadoopShims SHIMS = null;
+
+    public static synchronized HadoopShims get() {
+      if (SHIMS == null) {
+        String[] versionParts = VersionInfo.getVersion().split("[.]");
+        int major = Integer.parseInt(versionParts[0]);
+        int minor = Integer.parseInt(versionParts[1]);
+        if (major < 2 || (major == 2 && minor < 3)) {
+          SHIMS = new HadoopShims_2_2();
+        } else {
+          SHIMS = new HadoopShimsCurrent();
+        }
+      }
+      return SHIMS;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java b/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
new file mode 100644
index 0000000..3b9371d
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
+import org.apache.hadoop.io.compress.zlib.ZlibDecompressor;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * Shims for recent versions of Hadoop
+ */
+public class HadoopShimsCurrent implements HadoopShims {
+
+  private static class DirectDecompressWrapper implements DirectDecompressor {
+    private final org.apache.hadoop.io.compress.DirectDecompressor root;
+
+    DirectDecompressWrapper(org.apache.hadoop.io.compress.DirectDecompressor root) {
+      this.root = root;
+    }
+
+    public void decompress(ByteBuffer input,
+                           ByteBuffer output) throws IOException {
+      root.decompress(input, output);
+    }
+  }
+
+  public DirectDecompressor getDirectDecompressor(
+      DirectCompressionType codec) {
+    switch (codec) {
+      case ZLIB:
+        return new DirectDecompressWrapper
+            (new ZlibDecompressor.ZlibDirectDecompressor());
+      case ZLIB_NOHEADER:
+        return new DirectDecompressWrapper
+            (new ZlibDecompressor.ZlibDirectDecompressor
+                (ZlibDecompressor.CompressionHeader.NO_HEADER, 0));
+      case SNAPPY:
+        return new DirectDecompressWrapper
+            (new SnappyDecompressor.SnappyDirectDecompressor());
+      default:
+        return null;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java b/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java
new file mode 100644
index 0000000..ac46836
--- /dev/null
+++ b/orc/src/java/org/apache/orc/impl/HadoopShims_2_2.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
+import org.apache.hadoop.io.compress.zlib.ZlibDecompressor;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * Shims for versions of Hadoop up to and including 2.2.x
+ */
+public class HadoopShims_2_2 implements HadoopShims {
+
+  public DirectDecompressor getDirectDecompressor(
+      DirectCompressionType codec) {
+    return null;
+  }
+}