You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/05/13 19:50:32 UTC

[03/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
new file mode 100644
index 0000000..c069a5f
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * ColumnVector contains the shared structure for the sub-types,
+ * including NULL information, and whether this vector
+ * repeats, i.e. has all values the same, so only the first
+ * one is set. This is used to accelerate query performance
+ * by handling a whole vector in O(1) time when applicable.
+ *
+ * The fields are public by design since this is a performance-critical
+ * structure that is used in the inner loop of query execution.
+ */
+public abstract class ColumnVector {
+
+  /*
+   * The current kinds of column vectors.
+   */
+  public static enum Type {
+    NONE,    // Useful when the type of column vector has not be determined yet.
+    LONG,
+    DOUBLE,
+    BYTES,
+    DECIMAL,
+    TIMESTAMP,
+    INTERVAL_DAY_TIME,
+    STRUCT,
+    LIST,
+    MAP,
+    UNION
+  }
+
+  /*
+   * If hasNulls is true, then this array contains true if the value
+   * is null, otherwise false. The array is always allocated, so a batch can be re-used
+   * later and nulls added.
+   */
+  public boolean[] isNull;
+
+  // If the whole column vector has no nulls, this is true, otherwise false.
+  public boolean noNulls;
+
+  /*
+   * True if same value repeats for whole column vector.
+   * If so, vector[0] holds the repeating value.
+   */
+  public boolean isRepeating;
+
+  // Variables to hold state from before flattening so it can be easily restored.
+  private boolean preFlattenIsRepeating;
+  private boolean preFlattenNoNulls;
+
+  /**
+   * Constructor for super-class ColumnVector. This is not called directly,
+   * but used to initialize inherited fields.
+   *
+   * @param len Vector length
+   */
+  public ColumnVector(int len) {
+    isNull = new boolean[len];
+    noNulls = true;
+    isRepeating = false;
+    preFlattenNoNulls = true;
+    preFlattenIsRepeating = false;
+  }
+
+  /**
+   * Resets the column to default state
+   *  - fills the isNull array with false
+   *  - sets noNulls to true
+   *  - sets isRepeating to false
+   */
+  public void reset() {
+    if (!noNulls) {
+      Arrays.fill(isNull, false);
+    }
+    noNulls = true;
+    isRepeating = false;
+    preFlattenNoNulls = true;
+    preFlattenIsRepeating = false;
+  }
+
+  /**
+   * Sets the isRepeating flag. Recurses over structs and unions so that the
+   * flags are set correctly.
+   * @param isRepeating
+   */
+  public void setRepeating(boolean isRepeating) {
+    this.isRepeating = isRepeating;
+  }
+
+  abstract public void flatten(boolean selectedInUse, int[] sel, int size);
+
+    // Simplify vector by brute-force flattening noNulls if isRepeating
+    // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+    // with many arguments.
+    protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel,
+                                         int size) {
+
+      boolean nullFillValue;
+
+      if (noNulls) {
+        nullFillValue = false;
+      } else {
+        nullFillValue = isNull[0];
+      }
+
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          isNull[i] = nullFillValue;
+        }
+      } else {
+        Arrays.fill(isNull, 0, size, nullFillValue);
+      }
+
+      // all nulls are now explicit
+      noNulls = false;
+    }
+
+    protected void flattenNoNulls(boolean selectedInUse, int[] sel,
+                                  int size) {
+      if (noNulls) {
+        noNulls = false;
+        if (selectedInUse) {
+          for (int j = 0; j < size; j++) {
+            isNull[sel[j]] = false;
+          }
+        } else {
+          Arrays.fill(isNull, 0, size, false);
+        }
+      }
+    }
+
+    /**
+     * Restore the state of isRepeating and noNulls to what it was
+     * before flattening. This must only be called just after flattening
+     * and then evaluating a VectorExpression on the column vector.
+     * It is an optimization that allows other operations on the same
+     * column to continue to benefit from the isRepeating and noNulls
+     * indicators.
+     */
+    public void unFlatten() {
+      isRepeating = preFlattenIsRepeating;
+      noNulls = preFlattenNoNulls;
+    }
+
+    // Record repeating and no nulls state to be restored later.
+    protected void flattenPush() {
+      preFlattenIsRepeating = isRepeating;
+      preFlattenNoNulls = noNulls;
+    }
+
+    /**
+     * Set the element in this column vector from the given input vector.
+     * This method can assume that the output does not have isRepeating set.
+     */
+    public abstract void setElement(int outElementNum, int inputElementNum,
+                                    ColumnVector inputVector);
+
+    /**
+     * Initialize the column vector. This method can be overridden by specific column vector types.
+     * Use this method only if the individual type of the column vector is not known, otherwise its
+     * preferable to call specific initialization methods.
+     */
+    public void init() {
+      // Do nothing by default
+    }
+
+    /**
+     * Ensure the ColumnVector can hold at least size values.
+     * This method is deliberately *not* recursive because the complex types
+     * can easily have more (or less) children than the upper levels.
+     * @param size the new minimum size
+     * @param presesrveData should the old data be preserved?
+     */
+    public void ensureSize(int size, boolean presesrveData) {
+      if (isNull.length < size) {
+        boolean[] oldArray = isNull;
+        isNull = new boolean[size];
+        if (presesrveData && !noNulls) {
+          if (isRepeating) {
+            isNull[0] = oldArray[0];
+          } else {
+            System.arraycopy(oldArray, 0, isNull, 0, oldArray.length);
+          }
+        }
+      }
+    }
+
+    /**
+     * Print the value for this column into the given string builder.
+     * @param buffer the buffer to print into
+     * @param row the id of the row to print
+     */
+    public abstract void stringifyValue(StringBuilder buffer,
+                                        int row);
+  }

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
new file mode 100644
index 0000000..0c52210
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+import java.math.BigInteger;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+public class DecimalColumnVector extends ColumnVector {
+
+  /**
+   * A vector of HiveDecimalWritable objects.
+   *
+   * For high performance and easy access to this low-level structure,
+   * the fields are public by design (as they are in other ColumnVector
+   * types).
+   */
+  public HiveDecimalWritable[] vector;
+  public short scale;
+  public short precision;
+
+  public DecimalColumnVector(int precision, int scale) {
+    this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
+  }
+
+  public DecimalColumnVector(int size, int precision, int scale) {
+    super(size);
+    this.precision = (short) precision;
+    this.scale = (short) scale;
+    vector = new HiveDecimalWritable[size];
+    for (int i = 0; i < size; i++) {
+      vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
+    }
+  }
+
+  // Fill the all the vector entries with provided value
+  public void fill(HiveDecimal value) {
+    noNulls = true;
+    isRepeating = true;
+    if (vector[0] == null) {
+      vector[0] = new HiveDecimalWritable(value);
+    } else {
+      vector[0].set(value);
+    }
+  }
+
+  // Fill the column vector with nulls
+  public void fillWithNulls() {
+    noNulls = false;
+    isRepeating = true;
+    vector[0] = null;
+    isNull[0] = true;
+  }
+
+  @Override
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    // TODO Auto-generated method stub
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    if (inputVector.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+      HiveDecimal hiveDec =
+          ((DecimalColumnVector) inputVector).vector[inputElementNum]
+              .getHiveDecimal(precision, scale);
+      if (hiveDec == null) {
+        isNull[outElementNum] = true;
+        noNulls = false;
+      } else {
+        isNull[outElementNum] = false;
+        vector[outElementNum].set(hiveDec);
+      }
+    } else {
+      isNull[outElementNum] = true;
+      noNulls = false;
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append(vector[row].toString());
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  public void set(int elementNum, HiveDecimalWritable writeable) {
+    if (writeable == null) {
+      noNulls = false;
+      isNull[elementNum] = true;
+    } else {
+      HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale);
+      if (hiveDec == null) {
+        noNulls = false;
+        isNull[elementNum] = true;
+      } else {
+        vector[elementNum].set(hiveDec);
+      }
+    }
+  }
+
+  public void set(int elementNum, HiveDecimal hiveDec) {
+    HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale);
+    if (checkedDec == null) {
+      noNulls = false;
+      isNull[elementNum] = true;
+    } else {
+      vector[elementNum].set(checkedDec);
+    }
+  }
+
+  public void setNullDataValue(int elementNum) {
+    // E.g. For scale 2 the minimum is "0.01"
+    HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale);
+    vector[elementNum].set(minimumNonZeroValue);
+  }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (size > vector.length) {
+      HiveDecimalWritable[] oldArray = vector;
+      vector = new HiveDecimalWritable[size];
+      if (preserveData) {
+        // we copy all of the values to avoid creating more objects
+        System.arraycopy(oldArray, 0, vector, 0 , oldArray.length);
+        for(int i= oldArray.length; i < vector.length; ++i) {
+          vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO);
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
new file mode 100644
index 0000000..bd421f4
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable double precision floating point column vector.
+ * This class will be used for operations on all floating point types (float, double)
+ * and as such will use a 64-bit double value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class DoubleColumnVector extends ColumnVector {
+  public double[] vector;
+  public static final double NULL_VALUE = Double.NaN;
+
+  /**
+   * Use this constructor by default. All column vectors
+   * should normally be the default size.
+   */
+  public DoubleColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't use this except for testing purposes.
+   *
+   * @param len
+   */
+  public DoubleColumnVector(int len) {
+    super(len);
+    vector = new double[len];
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.vector[0] = vector[0];
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.vector[i] = vector[i];
+      }
+    }
+    else {
+      System.arraycopy(vector, 0, output.vector, 0, size);
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  // Fill the column vector with the provided value
+  public void fill(double value) {
+    noNulls = true;
+    isRepeating = true;
+    vector[0] = value;
+  }
+
+  // Fill the column vector with nulls
+  public void fillWithNulls() {
+    noNulls = false;
+    isRepeating = true;
+    vector[0] = NULL_VALUE;
+    isNull[0] = true;
+  }
+
+  // Simplify vector by brute-force flattening noNulls and isRepeating
+  // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+  // with many arguments.
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+      double repeatVal = vector[0];
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          vector[i] = repeatVal;
+        }
+      } else {
+        Arrays.fill(vector, 0, size, repeatVal);
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    if (inputVector.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+      isNull[outElementNum] = false;
+      vector[outElementNum] =
+          ((DoubleColumnVector) inputVector).vector[inputElementNum];
+    } else {
+      isNull[outElementNum] = true;
+      noNulls = false;
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append(vector[row]);
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (size > vector.length) {
+      double[] oldArray = vector;
+      vector = new double[size];
+      if (preserveData) {
+        if (isRepeating) {
+          vector[0] = oldArray[0];
+        } else {
+          System.arraycopy(oldArray, 0, vector, 0 , oldArray.length);
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
new file mode 100644
index 0000000..39ccea8
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -0,0 +1,348 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents a nullable interval day time column vector capable of handing a
+ * wide range of interval day time values.
+ *
+ * We store the 2 (value) fields of a HiveIntervalDayTime class in primitive arrays.
+ *
+ * We do this to avoid an array of Java HiveIntervalDayTime objects which would have poor storage
+ * and memory access characteristics.
+ *
+ * Generally, the caller will fill in a scratch HiveIntervalDayTime object with values from a row,
+ * work using the scratch HiveIntervalDayTime, and then perhaps update the column vector row
+ * with a result.
+ */
+public class IntervalDayTimeColumnVector extends ColumnVector {
+
+  /*
+   * The storage arrays for this column vector corresponds to the storage of a HiveIntervalDayTime:
+   */
+  private long[] totalSeconds;
+      // The values from HiveIntervalDayTime.getTotalSeconds().
+
+  private int[] nanos;
+      // The values from HiveIntervalDayTime.getNanos().
+
+  /*
+   * Scratch objects.
+   */
+  private final HiveIntervalDayTime scratchIntervalDayTime;
+
+  private Writable scratchWritable;
+      // Supports keeping a HiveIntervalDayTimeWritable object without having to import
+      // that definition...
+
+  /**
+   * Use this constructor by default. All column vectors
+   * should normally be the default size.
+   */
+  public IntervalDayTimeColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't use this except for testing purposes.
+   *
+   * @param len the number of rows
+   */
+  public IntervalDayTimeColumnVector(int len) {
+    super(len);
+
+    totalSeconds = new long[len];
+    nanos = new int[len];
+
+    scratchIntervalDayTime = new HiveIntervalDayTime();
+
+    scratchWritable = null;     // Allocated by caller.
+  }
+
+  /**
+   * Return the number of rows.
+   * @return
+   */
+  public int getLength() {
+    return totalSeconds.length;
+  }
+
+  /**
+   * Return a row's HiveIntervalDayTime.getTotalSeconds() value.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public long getTotalSeconds(int elementNum) {
+    return totalSeconds[elementNum];
+  }
+
+  /**
+   * Return a row's HiveIntervalDayTime.getNanos() value.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public long getNanos(int elementNum) {
+    return nanos[elementNum];
+  }
+
+  /**
+   * Return a row's HiveIntervalDayTime.getDouble() value.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public double getDouble(int elementNum) {
+    return asScratchIntervalDayTime(elementNum).getDouble();
+  }
+
+  /**
+   * Set a HiveIntervalDayTime object from a row of the column.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param intervalDayTime
+   * @param elementNum
+   */
+  public void intervalDayTimeUpdate(HiveIntervalDayTime intervalDayTime, int elementNum) {
+    intervalDayTime.set(totalSeconds[elementNum], nanos[elementNum]);
+  }
+
+
+  /**
+   * Return the scratch HiveIntervalDayTime object set from a row.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public HiveIntervalDayTime asScratchIntervalDayTime(int elementNum) {
+    scratchIntervalDayTime.set(totalSeconds[elementNum], nanos[elementNum]);
+    return scratchIntervalDayTime;
+  }
+
+  /**
+   * Return the scratch HiveIntervalDayTime (contents undefined).
+   * @return
+   */
+  public HiveIntervalDayTime getScratchIntervalDayTime() {
+    return scratchIntervalDayTime;
+  }
+
+  /**
+   * Compare row to HiveIntervalDayTime.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @param intervalDayTime
+   * @return -1, 0, 1 standard compareTo values.
+   */
+  public int compareTo(int elementNum, HiveIntervalDayTime intervalDayTime) {
+    return asScratchIntervalDayTime(elementNum).compareTo(intervalDayTime);
+  }
+
+  /**
+   * Compare HiveIntervalDayTime to row.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param intervalDayTime
+   * @param elementNum
+   * @return -1, 0, 1 standard compareTo values.
+   */
+  public int compareTo(HiveIntervalDayTime intervalDayTime, int elementNum) {
+    return intervalDayTime.compareTo(asScratchIntervalDayTime(elementNum));
+  }
+
+  /**
+   * Compare a row to another TimestampColumnVector's row.
+   * @param elementNum1
+   * @param intervalDayTimeColVector2
+   * @param elementNum2
+   * @return
+   */
+  public int compareTo(int elementNum1, IntervalDayTimeColumnVector intervalDayTimeColVector2,
+      int elementNum2) {
+    return asScratchIntervalDayTime(elementNum1).compareTo(
+        intervalDayTimeColVector2.asScratchIntervalDayTime(elementNum2));
+  }
+
+  /**
+   * Compare another TimestampColumnVector's row to a row.
+   * @param intervalDayTimeColVector1
+   * @param elementNum1
+   * @param elementNum2
+   * @return
+   */
+  public int compareTo(IntervalDayTimeColumnVector intervalDayTimeColVector1, int elementNum1,
+      int elementNum2) {
+    return intervalDayTimeColVector1.asScratchIntervalDayTime(elementNum1).compareTo(
+        asScratchIntervalDayTime(elementNum2));
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+
+    IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputVector;
+
+    totalSeconds[outElementNum] = timestampColVector.totalSeconds[inputElementNum];
+    nanos[outElementNum] = timestampColVector.nanos[inputElementNum];
+  }
+
+  // Simplify vector by brute-force flattening noNulls and isRepeating
+  // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+  // with many arguments.
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+      long repeatFastTime = totalSeconds[0];
+      int repeatNanos = nanos[0];
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          totalSeconds[i] = repeatFastTime;
+          nanos[i] = repeatNanos;
+        }
+      } else {
+        Arrays.fill(totalSeconds, 0, size, repeatFastTime);
+        Arrays.fill(nanos, 0, size, repeatNanos);
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  /**
+   * Set a row from a HiveIntervalDayTime.
+   * We assume the entry has already been isRepeated adjusted.
+   * @param elementNum
+   * @param intervalDayTime
+   */
+  public void set(int elementNum, HiveIntervalDayTime intervalDayTime) {
+    this.totalSeconds[elementNum] = intervalDayTime.getTotalSeconds();
+    this.nanos[elementNum] = intervalDayTime.getNanos();
+  }
+
+  /**
+   * Set a row from the current value in the scratch interval day time.
+   * @param elementNum
+   */
+  public void setFromScratchIntervalDayTime(int elementNum) {
+    this.totalSeconds[elementNum] = scratchIntervalDayTime.getTotalSeconds();
+    this.nanos[elementNum] = scratchIntervalDayTime.getNanos();
+  }
+
+  /**
+   * Set row to standard null value(s).
+   * We assume the entry has already been isRepeated adjusted.
+   * @param elementNum
+   */
+  public void setNullValue(int elementNum) {
+    totalSeconds[elementNum] = 0;
+    nanos[elementNum] = 1;
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.totalSeconds[0] = totalSeconds[0];
+      output.nanos[0] = nanos[0];
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.totalSeconds[i] = totalSeconds[i];
+        output.nanos[i] = nanos[i];
+      }
+    }
+    else {
+      System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size);
+      System.arraycopy(nanos, 0, output.nanos, 0, size);
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  /**
+   * Fill all the vector entries with a HiveIntervalDayTime.
+   * @param intervalDayTime
+   */
+  public void fill(HiveIntervalDayTime intervalDayTime) {
+    noNulls = true;
+    isRepeating = true;
+    totalSeconds[0] = intervalDayTime.getTotalSeconds();
+    nanos[0] = intervalDayTime.getNanos();
+  }
+
+  /**
+   * Return a convenience writable object stored by this column vector.
+   * Supports keeping a TimestampWritable object without having to import that definition...
+   * @return
+   */
+  public Writable getScratchWritable() {
+    return scratchWritable;
+  }
+
+  /**
+   * Set the convenience writable object stored by this column vector
+   * @param scratchWritable
+   */
+  public void setScratchWritable(Writable scratchWritable) {
+    this.scratchWritable = scratchWritable;
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      scratchIntervalDayTime.set(totalSeconds[row], nanos[row]);
+      buffer.append(scratchIntervalDayTime.toString());
+    } else {
+      buffer.append("null");
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
new file mode 100644
index 0000000..66240dd
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of list objects.
+ *
+ * Each list is composed of a range of elements in the underlying child
+ * ColumnVector. The range for list i is
+ * offsets[i]..offsets[i]+lengths[i]-1 inclusive.
+ */
+public class ListColumnVector extends MultiValuedColumnVector {
+
+  public ColumnVector child;
+
+  public ListColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE, null);
+  }
+
+  /**
+   * Constructor for ListColumnVector.
+   *
+   * @param len Vector length
+   * @param child The child vector
+   */
+  public ListColumnVector(int len, ColumnVector child) {
+    super(len);
+    this.child = child;
+  }
+
+  @Override
+  protected void childFlatten(boolean useSelected, int[] selected, int size) {
+    child.flatten(useSelected, selected, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum,
+                         ColumnVector inputVector) {
+    ListColumnVector input = (ListColumnVector) inputVector;
+    if (input.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (!input.noNulls && input.isNull[inputElementNum]) {
+      isNull[outElementNum] = true;
+      noNulls = false;
+    } else {
+      isNull[outElementNum] = false;
+      int offset = childCount;
+      int length = (int) input.lengths[inputElementNum];
+      int inputOffset = (int) input.offsets[inputElementNum];
+      offsets[outElementNum] = offset;
+      childCount += length;
+      lengths[outElementNum] = length;
+      child.ensureSize(childCount, true);
+      for (int i = 0; i < length; ++i) {
+        child.setElement(i + offset, inputOffset + i, input.child);
+      }
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append('[');
+      boolean isFirst = true;
+      for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) {
+        if (isFirst) {
+          isFirst = false;
+        } else {
+          buffer.append(", ");
+        }
+        child.stringifyValue(buffer, (int) i);
+      }
+      buffer.append(']');
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  @Override
+  public void init() {
+    super.init();
+    child.init();
+  }
+
+  @Override
+  public void reset() {
+    super.reset();
+    child.reset();
+  }
+
+  @Override
+  public void unFlatten() {
+    super.unFlatten();
+    if (!isRepeating || noNulls || !isNull[0]) {
+      child.unFlatten();
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
new file mode 100644
index 0000000..80d4731
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * This class represents a nullable int column vector.
+ * This class will be used for operations on all integer types (tinyint, smallint, int, bigint)
+ * and as such will use a 64-bit long value to hold the biggest possible value.
+ * During copy-in/copy-out, smaller int types will be converted as needed. This will
+ * reduce the amount of code that needs to be generated and also will run fast since the
+ * machine operates with 64-bit words.
+ *
+ * The vector[] field is public by design for high-performance access in the inner
+ * loop of query execution.
+ */
+public class LongColumnVector extends ColumnVector {
+  public long[] vector;
+  public static final long NULL_VALUE = 1;
+
+  /**
+   * Use this constructor by default. All column vectors
+   * should normally be the default size.
+   */
+  public LongColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't use this except for testing purposes.
+   *
+   * @param len the number of rows
+   */
+  public LongColumnVector(int len) {
+    super(len);
+    vector = new long[len];
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.vector[0] = vector[0];
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.vector[i] = vector[i];
+      }
+    }
+    else {
+      System.arraycopy(vector, 0, output.vector, 0, size);
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.vector[0] = vector[0];  // automatic conversion to double is done here
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.vector[i] = vector[i];
+      }
+    }
+    else {
+      for(int i = 0; i < size; ++i) {
+        output.vector[i] = vector[i];
+      }
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  // Fill the column vector with the provided value
+  public void fill(long value) {
+    noNulls = true;
+    isRepeating = true;
+    vector[0] = value;
+  }
+
+  // Fill the column vector with nulls
+  public void fillWithNulls() {
+    noNulls = false;
+    isRepeating = true;
+    vector[0] = NULL_VALUE;
+    isNull[0] = true;
+  }
+
+  // Simplify vector by brute-force flattening noNulls and isRepeating
+  // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+  // with many arguments.
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+      long repeatVal = vector[0];
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          vector[i] = repeatVal;
+        }
+      } else {
+        Arrays.fill(vector, 0, size, repeatVal);
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+    if (inputVector.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+      isNull[outElementNum] = false;
+      vector[outElementNum] =
+          ((LongColumnVector) inputVector).vector[inputElementNum];
+    } else {
+      isNull[outElementNum] = true;
+      noNulls = false;
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append(vector[row]);
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (size > vector.length) {
+      long[] oldArray = vector;
+      vector = new long[size];
+      if (preserveData) {
+        if (isRepeating) {
+          vector[0] = oldArray[0];
+        } else {
+          System.arraycopy(oldArray, 0, vector, 0 , oldArray.length);
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
new file mode 100644
index 0000000..e8421e3
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of map objects.
+ *
+ * Each map is composed of a range of elements in the underlying child
+ * ColumnVector. The range for map i is
+ * offsets[i]..offsets[i]+lengths[i]-1 inclusive.
+ */
+public class MapColumnVector extends MultiValuedColumnVector {
+
+  public ColumnVector keys;
+  public ColumnVector values;
+
+  public MapColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE, null, null);
+  }
+
+  /**
+   * Constructor for MapColumnVector
+   *
+   * @param len Vector length
+   * @param keys The keys column vector
+   * @param values The values column vector
+   */
+  public MapColumnVector(int len, ColumnVector keys, ColumnVector values) {
+    super(len);
+    this.keys = keys;
+    this.values = values;
+  }
+
+  @Override
+  protected void childFlatten(boolean useSelected, int[] selected, int size) {
+    keys.flatten(useSelected, selected, size);
+    values.flatten(useSelected, selected, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum,
+                         ColumnVector inputVector) {
+    if (inputVector.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) {
+      isNull[outElementNum] = true;
+      noNulls = false;
+    } else {
+      MapColumnVector input = (MapColumnVector) inputVector;
+      isNull[outElementNum] = false;
+      int offset = childCount;
+      int length = (int) input.lengths[inputElementNum];
+      int inputOffset = (int) input.offsets[inputElementNum];
+      offsets[outElementNum] = offset;
+      childCount += length;
+      lengths[outElementNum] = length;
+      keys.ensureSize(childCount, true);
+      values.ensureSize(childCount, true);
+      for (int i = 0; i < length; ++i) {
+        keys.setElement(i + offset, inputOffset + i, input.keys);
+        values.setElement(i + offset, inputOffset + i, input.values);
+      }
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append('[');
+      boolean isFirst = true;
+      for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) {
+        if (isFirst) {
+          isFirst = false;
+        } else {
+          buffer.append(", ");
+        }
+        buffer.append("{\"key\": ");
+        keys.stringifyValue(buffer, (int) i);
+        buffer.append(", \"value\": ");
+        values.stringifyValue(buffer, (int) i);
+        buffer.append('}');
+      }
+      buffer.append(']');
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  @Override
+  public void init() {
+    super.init();
+    keys.init();
+    values.init();
+  }
+
+  @Override
+  public void reset() {
+    super.reset();
+    keys.reset();
+    values.reset();
+  }
+
+  @Override
+  public void unFlatten() {
+    super.unFlatten();
+    if (!isRepeating || noNulls || !isNull[0]) {
+      keys.unFlatten();
+      values.unFlatten();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
new file mode 100644
index 0000000..1aeff83
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.Arrays;
+
+/**
+ * The representation of a vectorized column of multi-valued objects, such
+ * as lists and maps.
+ *
+ * Each object is composed of a range of elements in the underlying child
+ * ColumnVector. The range for list i is
+ * offsets[i]..offsets[i]+lengths[i]-1 inclusive.
+ */
+public abstract class MultiValuedColumnVector extends ColumnVector {
+
+  public long[] offsets;
+  public long[] lengths;
+  // the number of children slots used
+  public int childCount;
+
+  /**
+   * Constructor for MultiValuedColumnVector.
+   *
+   * @param len Vector length
+   */
+  public MultiValuedColumnVector(int len) {
+    super(len);
+    childCount = 0;
+    offsets = new long[len];
+    lengths = new long[len];
+  }
+
+  protected abstract void childFlatten(boolean useSelected, int[] selected,
+                                       int size);
+
+  @Override
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+
+    if (isRepeating) {
+      if (noNulls || !isNull[0]) {
+        if (selectedInUse) {
+          for (int i = 0; i < size; ++i) {
+            int row = sel[i];
+            offsets[row] = offsets[0];
+            lengths[row] = lengths[0];
+            isNull[row] = false;
+          }
+        } else {
+          Arrays.fill(offsets, 0, size, offsets[0]);
+          Arrays.fill(lengths, 0, size, lengths[0]);
+          Arrays.fill(isNull, 0, size, false);
+        }
+        // We optimize by assuming that a repeating list/map will run from
+        // from 0 .. lengths[0] in the child vector.
+        // Sanity check the assumption that we can start at 0.
+        if (offsets[0] != 0) {
+          throw new IllegalArgumentException("Repeating offset isn't 0, but " +
+                                             offsets[0]);
+        }
+        childFlatten(false, null, (int) lengths[0]);
+      } else {
+        if (selectedInUse) {
+          for(int i=0; i < size; ++i) {
+            isNull[sel[i]] = true;
+          }
+        } else {
+          Arrays.fill(isNull, 0, size, true);
+        }
+      }
+      isRepeating = false;
+      noNulls = false;
+    } else {
+      if (selectedInUse) {
+        int childSize = 0;
+        for(int i=0; i < size; ++i) {
+          childSize += lengths[sel[i]];
+        }
+        int[] childSelection = new int[childSize];
+        int idx = 0;
+        for(int i=0; i < size; ++i) {
+          int row = sel[i];
+          for(int elem=0; elem < lengths[row]; ++elem) {
+            childSelection[idx++] = (int) (offsets[row] + elem);
+          }
+        }
+        childFlatten(true, childSelection, childSize);
+      } else {
+        childFlatten(false, null, childCount);
+      }
+      flattenNoNulls(selectedInUse, sel, size);
+    }
+  }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (size > offsets.length) {
+      long[] oldOffsets = offsets;
+      offsets = new long[size];
+      long oldLengths[] = lengths;
+      lengths = new long[size];
+      if (preserveData) {
+        if (isRepeating) {
+          offsets[0] = oldOffsets[0];
+          lengths[0] = oldLengths[0];
+        } else {
+          System.arraycopy(oldOffsets, 0, offsets, 0 , oldOffsets.length);
+          System.arraycopy(oldLengths, 0, lengths, 0, oldLengths.length);
+        }
+      }
+    }
+  }
+
+  /**
+   * Initializee the vector
+   */
+  @Override
+  public void init() {
+    super.init();
+    childCount = 0;
+  }
+
+  /**
+   * Reset the vector for the next batch.
+   */
+  @Override
+  public void reset() {
+    super.reset();
+    childCount = 0;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java
new file mode 100644
index 0000000..cf07bca
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of struct objects.
+ *
+ * Each field is represented by a separate inner ColumnVector. Since this
+ * ColumnVector doesn't own any per row data other that the isNull flag, the
+ * isRepeating only covers the isNull array.
+ */
+public class StructColumnVector extends ColumnVector {
+
+  public ColumnVector[] fields;
+
+  public StructColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Constructor for StructColumnVector
+   *
+   * @param len Vector length
+   * @param fields the field column vectors
+   */
+  public StructColumnVector(int len, ColumnVector... fields) {
+    super(len);
+    this.fields = fields;
+  }
+
+  @Override
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].flatten(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum,
+                         ColumnVector inputVector) {
+    if (inputVector.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+      isNull[outElementNum] = false;
+      ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields;
+      for (int i = 0; i < inputFields.length; ++i) {
+        fields[i].setElement(outElementNum, inputElementNum, inputFields[i]);
+      }
+    } else {
+      noNulls = false;
+      isNull[outElementNum] = true;
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append('[');
+      for(int i=0; i < fields.length; ++i) {
+        if (i != 0) {
+          buffer.append(", ");
+        }
+        fields[i].stringifyValue(buffer, row);
+      }
+      buffer.append(']');
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].ensureSize(size, preserveData);
+    }
+  }
+
+  @Override
+  public void reset() {
+    super.reset();
+    for(int i =0; i < fields.length; ++i) {
+      fields[i].reset();
+    }
+  }
+
+  @Override
+  public void init() {
+    super.init();
+    for(int i =0; i < fields.length; ++i) {
+      fields[i].init();
+    }
+  }
+
+  @Override
+  public void unFlatten() {
+    super.unFlatten();
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].unFlatten();
+    }
+  }
+
+  @Override
+  public void setRepeating(boolean isRepeating) {
+    super.setRepeating(isRepeating);
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].setRepeating(isRepeating);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
new file mode 100644
index 0000000..228461a
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -0,0 +1,400 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.sql.Timestamp;
+import java.util.Arrays;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents a nullable timestamp column vector capable of handing a wide range of
+ * timestamp values.
+ *
+ * We store the 2 (value) fields of a Timestamp class in primitive arrays.
+ *
+ * We do this to avoid an array of Java Timestamp objects which would have poor storage
+ * and memory access characteristics.
+ *
+ * Generally, the caller will fill in a scratch timestamp object with values from a row, work
+ * using the scratch timestamp, and then perhaps update the column vector row with a result.
+ */
+public class TimestampColumnVector extends ColumnVector {
+
+  /*
+   * The storage arrays for this column vector corresponds to the storage of a Timestamp:
+   */
+  public long[] time;
+      // The values from Timestamp.getTime().
+
+  public int[] nanos;
+      // The values from Timestamp.getNanos().
+
+  /*
+   * Scratch objects.
+   */
+  private final Timestamp scratchTimestamp;
+
+  private Writable scratchWritable;
+      // Supports keeping a TimestampWritable object without having to import that definition...
+
+  /**
+   * Use this constructor by default. All column vectors
+   * should normally be the default size.
+   */
+  public TimestampColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Don't use this except for testing purposes.
+   *
+   * @param len the number of rows
+   */
+  public TimestampColumnVector(int len) {
+    super(len);
+
+    time = new long[len];
+    nanos = new int[len];
+
+    scratchTimestamp = new Timestamp(0);
+
+    scratchWritable = null;     // Allocated by caller.
+  }
+
+  /**
+   * Return the number of rows.
+   * @return
+   */
+  public int getLength() {
+    return time.length;
+  }
+
+  /**
+   * Return a row's Timestamp.getTime() value.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public long getTime(int elementNum) {
+    return time[elementNum];
+  }
+
+  /**
+   * Return a row's Timestamp.getNanos() value.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public int getNanos(int elementNum) {
+    return nanos[elementNum];
+  }
+
+  /**
+   * Set a Timestamp object from a row of the column.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param timestamp
+   * @param elementNum
+   */
+  public void timestampUpdate(Timestamp timestamp, int elementNum) {
+    timestamp.setTime(time[elementNum]);
+    timestamp.setNanos(nanos[elementNum]);
+  }
+
+  /**
+   * Return the scratch Timestamp object set from a row.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @return
+   */
+  public Timestamp asScratchTimestamp(int elementNum) {
+    scratchTimestamp.setTime(time[elementNum]);
+    scratchTimestamp.setNanos(nanos[elementNum]);
+    return scratchTimestamp;
+  }
+
+  /**
+   * Return the scratch timestamp (contents undefined).
+   * @return
+   */
+  public Timestamp getScratchTimestamp() {
+    return scratchTimestamp;
+  }
+
+  /**
+   * Return a long representation of a Timestamp.
+   * @param elementNum
+   * @return
+   */
+  public long getTimestampAsLong(int elementNum) {
+    scratchTimestamp.setTime(time[elementNum]);
+    scratchTimestamp.setNanos(nanos[elementNum]);
+    return getTimestampAsLong(scratchTimestamp);
+  }
+
+  /**
+   * Return a long representation of a Timestamp.
+   * @param timestamp
+   * @return
+   */
+  public static long getTimestampAsLong(Timestamp timestamp) {
+    return millisToSeconds(timestamp.getTime());
+  }
+
+  // Copy of TimestampWritable.millisToSeconds
+  /**
+   * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of
+   * seconds. 500 would round to 0, -500 would round to -1.
+   */
+  private static long millisToSeconds(long millis) {
+    if (millis >= 0) {
+      return millis / 1000;
+    } else {
+      return (millis - 999) / 1000;
+    }
+  }
+
+  /**
+   * Return a double representation of a Timestamp.
+   * @param elementNum
+   * @return
+   */
+  public double getDouble(int elementNum) {
+    scratchTimestamp.setTime(time[elementNum]);
+    scratchTimestamp.setNanos(nanos[elementNum]);
+    return getDouble(scratchTimestamp);
+  }
+
+  /**
+   * Return a double representation of a Timestamp.
+   * @param elementNum
+   * @return
+   */
+  public static double getDouble(Timestamp timestamp) {
+    // Same algorithm as TimestampWritable (not currently import-able here).
+    double seconds, nanos;
+    seconds = millisToSeconds(timestamp.getTime());
+    nanos = timestamp.getNanos();
+    return seconds + nanos / 1000000000;
+  }
+
+  /**
+   * Compare row to Timestamp.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param elementNum
+   * @param timestamp
+   * @return -1, 0, 1 standard compareTo values.
+   */
+  public int compareTo(int elementNum, Timestamp timestamp) {
+    return asScratchTimestamp(elementNum).compareTo(timestamp);
+  }
+
+  /**
+   * Compare Timestamp to row.
+   * We assume the entry has already been NULL checked and isRepeated adjusted.
+   * @param timestamp
+   * @param elementNum
+   * @return -1, 0, 1 standard compareTo values.
+   */
+  public int compareTo(Timestamp timestamp, int elementNum) {
+    return timestamp.compareTo(asScratchTimestamp(elementNum));
+  }
+
+  /**
+   * Compare a row to another TimestampColumnVector's row.
+   * @param elementNum1
+   * @param timestampColVector2
+   * @param elementNum2
+   * @return
+   */
+  public int compareTo(int elementNum1, TimestampColumnVector timestampColVector2,
+      int elementNum2) {
+    return asScratchTimestamp(elementNum1).compareTo(
+        timestampColVector2.asScratchTimestamp(elementNum2));
+  }
+
+  /**
+   * Compare another TimestampColumnVector's row to a row.
+   * @param timestampColVector1
+   * @param elementNum1
+   * @param elementNum2
+   * @return
+   */
+  public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1,
+      int elementNum2) {
+    return timestampColVector1.asScratchTimestamp(elementNum1).compareTo(
+        asScratchTimestamp(elementNum2));
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+
+    TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector;
+
+    time[outElementNum] = timestampColVector.time[inputElementNum];
+    nanos[outElementNum] = timestampColVector.nanos[inputElementNum];
+  }
+
+  // Simplify vector by brute-force flattening noNulls and isRepeating
+  // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+  // with many arguments.
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    if (isRepeating) {
+      isRepeating = false;
+      long repeatFastTime = time[0];
+      int repeatNanos = nanos[0];
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          time[i] = repeatFastTime;
+          nanos[i] = repeatNanos;
+        }
+      } else {
+        Arrays.fill(time, 0, size, repeatFastTime);
+        Arrays.fill(nanos, 0, size, repeatNanos);
+      }
+      flattenRepeatingNulls(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  /**
+   * Set a row from a timestamp.
+   * We assume the entry has already been isRepeated adjusted.
+   * @param elementNum
+   * @param timestamp
+   */
+  public void set(int elementNum, Timestamp timestamp) {
+    if (timestamp == null) {
+      this.noNulls = false;
+      this.isNull[elementNum] = true;
+    } else {
+      this.time[elementNum] = timestamp.getTime();
+      this.nanos[elementNum] = timestamp.getNanos();
+    }
+  }
+
+  /**
+   * Set a row from the current value in the scratch timestamp.
+   * @param elementNum
+   */
+  public void setFromScratchTimestamp(int elementNum) {
+    this.time[elementNum] = scratchTimestamp.getTime();
+    this.nanos[elementNum] = scratchTimestamp.getNanos();
+  }
+
+  /**
+   * Set row to standard null value(s).
+   * We assume the entry has already been isRepeated adjusted.
+   * @param elementNum
+   */
+  public void setNullValue(int elementNum) {
+    time[elementNum] = 0;
+    nanos[elementNum] = 1;
+  }
+
+  // Copy the current object contents into the output. Only copy selected entries,
+  // as indicated by selectedInUse and the sel array.
+  public void copySelected(
+      boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) {
+
+    // Output has nulls if and only if input has nulls.
+    output.noNulls = noNulls;
+    output.isRepeating = false;
+
+    // Handle repeating case
+    if (isRepeating) {
+      output.time[0] = time[0];
+      output.nanos[0] = nanos[0];
+      output.isNull[0] = isNull[0];
+      output.isRepeating = true;
+      return;
+    }
+
+    // Handle normal case
+
+    // Copy data values over
+    if (selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = sel[j];
+        output.time[i] = time[i];
+        output.nanos[i] = nanos[i];
+      }
+    }
+    else {
+      System.arraycopy(time, 0, output.time, 0, size);
+      System.arraycopy(nanos, 0, output.nanos, 0, size);
+    }
+
+    // Copy nulls over if needed
+    if (!noNulls) {
+      if (selectedInUse) {
+        for (int j = 0; j < size; j++) {
+          int i = sel[j];
+          output.isNull[i] = isNull[i];
+        }
+      }
+      else {
+        System.arraycopy(isNull, 0, output.isNull, 0, size);
+      }
+    }
+  }
+
+  /**
+   * Fill all the vector entries with a timestamp.
+   * @param timestamp
+   */
+  public void fill(Timestamp timestamp) {
+    noNulls = true;
+    isRepeating = true;
+    time[0] = timestamp.getTime();
+    nanos[0] = timestamp.getNanos();
+  }
+
+  /**
+   * Return a convenience writable object stored by this column vector.
+   * Supports keeping a TimestampWritable object without having to import that definition...
+   * @return
+   */
+  public Writable getScratchWritable() {
+    return scratchWritable;
+  }
+
+  /**
+   * Set the convenience writable object stored by this column vector
+   * @param scratchWritable
+   */
+  public void setScratchWritable(Writable scratchWritable) {
+    this.scratchWritable = scratchWritable;
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      scratchTimestamp.setTime(time[row]);
+      scratchTimestamp.setNanos(nanos[row]);
+      buffer.append(scratchTimestamp.toString());
+    } else {
+      buffer.append("null");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
new file mode 100644
index 0000000..0c61243
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * The representation of a vectorized column of struct objects.
+ *
+ * Each field is represented by a separate inner ColumnVector. Since this
+ * ColumnVector doesn't own any per row data other that the isNull flag, the
+ * isRepeating only covers the isNull array.
+ */
+public class UnionColumnVector extends ColumnVector {
+
+  public int[] tags;
+  public ColumnVector[] fields;
+
+  public UnionColumnVector() {
+    this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Constructor for UnionColumnVector
+   *
+   * @param len Vector length
+   * @param fields the field column vectors
+   */
+  public UnionColumnVector(int len, ColumnVector... fields) {
+    super(len);
+    tags = new int[len];
+    this.fields = fields;
+  }
+
+  @Override
+  public void flatten(boolean selectedInUse, int[] sel, int size) {
+    flattenPush();
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].flatten(selectedInUse, sel, size);
+    }
+    flattenNoNulls(selectedInUse, sel, size);
+  }
+
+  @Override
+  public void setElement(int outElementNum, int inputElementNum,
+                         ColumnVector inputVector) {
+    if (inputVector.isRepeating) {
+      inputElementNum = 0;
+    }
+    if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
+      isNull[outElementNum] = false;
+      UnionColumnVector input = (UnionColumnVector) inputVector;
+      tags[outElementNum] = input.tags[inputElementNum];
+      fields[tags[outElementNum]].setElement(outElementNum, inputElementNum,
+          input.fields[tags[outElementNum]]);
+    } else {
+      noNulls = false;
+      isNull[outElementNum] = true;
+    }
+  }
+
+  @Override
+  public void stringifyValue(StringBuilder buffer, int row) {
+    if (isRepeating) {
+      row = 0;
+    }
+    if (noNulls || !isNull[row]) {
+      buffer.append("{\"tag\": ");
+      buffer.append(tags[row]);
+      buffer.append(", \"value\": ");
+      fields[tags[row]].stringifyValue(buffer, row);
+      buffer.append('}');
+    } else {
+      buffer.append("null");
+    }
+  }
+
+  @Override
+  public void ensureSize(int size, boolean preserveData) {
+    super.ensureSize(size, preserveData);
+    if (tags.length < size) {
+      if (preserveData) {
+        int[] oldTags = tags;
+        tags = new int[size];
+        System.arraycopy(oldTags, 0, tags, 0, oldTags.length);
+      } else {
+        tags = new int[size];
+      }
+      for(int i=0; i < fields.length; ++i) {
+        fields[i].ensureSize(size, preserveData);
+      }
+    }
+  }
+
+  @Override
+  public void reset() {
+    super.reset();
+    for(int i =0; i < fields.length; ++i) {
+      fields[i].reset();
+    }
+  }
+
+  @Override
+  public void init() {
+    super.init();
+    for(int i =0; i < fields.length; ++i) {
+      fields[i].init();
+    }
+  }
+
+  @Override
+  public void unFlatten() {
+    super.unFlatten();
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].unFlatten();
+    }
+  }
+
+  @Override
+  public void setRepeating(boolean isRepeating) {
+    super.setRepeating(isRepeating);
+    for(int i=0; i < fields.length; ++i) {
+      fields[i].setRepeating(isRepeating);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
new file mode 100644
index 0000000..9c066e0
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A VectorizedRowBatch is a set of rows, organized with each column
+ * as a vector. It is the unit of query execution, organized to minimize
+ * the cost per row and achieve high cycles-per-instruction.
+ * The major fields are public by design to allow fast and convenient
+ * access by the vectorized query execution code.
+ */
+public class VectorizedRowBatch implements Writable {
+  public int numCols;           // number of columns
+  public ColumnVector[] cols;   // a vector for each column
+  public int size;              // number of rows that qualify (i.e. haven't been filtered out)
+  public int[] selected;        // array of positions of selected values
+  public int[] projectedColumns;
+  public int projectionSize;
+
+  private int dataColumnCount;
+  private int partitionColumnCount;
+
+
+  /*
+   * If no filtering has been applied yet, selectedInUse is false,
+   * meaning that all rows qualify. If it is true, then the selected[] array
+   * records the offsets of qualifying rows.
+   */
+  public boolean selectedInUse;
+
+  // If this is true, then there is no data in the batch -- we have hit the end of input.
+  public boolean endOfFile;
+
+  /*
+   * This number is carefully chosen to minimize overhead and typically allows
+   * one VectorizedRowBatch to fit in cache.
+   */
+  public static final int DEFAULT_SIZE = 1024;
+
+  /**
+   * Return a batch with the specified number of columns.
+   * This is the standard constructor -- all batches should be the same size
+   *
+   * @param numCols the number of columns to include in the batch
+   */
+  public VectorizedRowBatch(int numCols) {
+    this(numCols, DEFAULT_SIZE);
+  }
+
+  /**
+   * Return a batch with the specified number of columns and rows.
+   * Only call this constructor directly for testing purposes.
+   * Batch size should normally always be defaultSize.
+   *
+   * @param numCols the number of columns to include in the batch
+   * @param size  the number of rows to include in the batch
+   */
+  public VectorizedRowBatch(int numCols, int size) {
+    this.numCols = numCols;
+    this.size = size;
+    selected = new int[size];
+    selectedInUse = false;
+    this.cols = new ColumnVector[numCols];
+    projectedColumns = new int[numCols];
+
+    // Initially all columns are projected and in the same order
+    projectionSize = numCols;
+    for (int i = 0; i < numCols; i++) {
+      projectedColumns[i] = i;
+    }
+
+    dataColumnCount = -1;
+    partitionColumnCount = -1;
+  }
+
+  public void setPartitionInfo(int dataColumnCount, int partitionColumnCount) {
+    this.dataColumnCount = dataColumnCount;
+    this.partitionColumnCount = partitionColumnCount;
+  }
+
+  public int getDataColumnCount() {
+    return dataColumnCount;
+  }
+
+  public int getPartitionColumnCount() {
+    return partitionColumnCount;
+  }
+
+  /**
+   * Returns the maximum size of the batch (number of rows it can hold)
+   */
+  public int getMaxSize() {
+      return selected.length;
+  }
+
+  /**
+   * Return count of qualifying rows.
+   *
+   * @return number of rows that have not been filtered out
+   */
+  public long count() {
+    return size;
+  }
+
+  private static String toUTF8(Object o) {
+    if(o == null || o instanceof NullWritable) {
+      return "\\N"; /* as found in LazySimpleSerDe's nullSequence */
+    }
+    return o.toString();
+  }
+
+  @Override
+  public String toString() {
+    if (size == 0) {
+      return "";
+    }
+    StringBuilder b = new StringBuilder();
+    if (this.selectedInUse) {
+      for (int j = 0; j < size; j++) {
+        int i = selected[j];
+        b.append('[');
+        for (int k = 0; k < projectionSize; k++) {
+          int projIndex = projectedColumns[k];
+          ColumnVector cv = cols[projIndex];
+          if (k > 0) {
+            b.append(", ");
+          }
+          cv.stringifyValue(b, i);
+        }
+        b.append(']');
+        if (j < size - 1) {
+          b.append('\n');
+        }
+      }
+    } else {
+      for (int i = 0; i < size; i++) {
+        b.append('[');
+        for (int k = 0; k < projectionSize; k++) {
+          int projIndex = projectedColumns[k];
+          ColumnVector cv = cols[projIndex];
+          if (k > 0) {
+            b.append(", ");
+          }
+          if (cv != null) {
+            cv.stringifyValue(b, i);
+          }
+        }
+        b.append(']');
+        if (i < size - 1) {
+          b.append('\n');
+        }
+      }
+    }
+    return b.toString();
+  }
+
+  @Override
+  public void readFields(DataInput arg0) throws IOException {
+    throw new UnsupportedOperationException("Do you really need me?");
+  }
+
+  @Override
+  public void write(DataOutput arg0) throws IOException {
+    throw new UnsupportedOperationException("Don't call me");
+  }
+
+  /**
+   * Resets the row batch to default state
+   *  - sets selectedInUse to false
+   *  - sets size to 0
+   *  - sets endOfFile to false
+   *  - resets each column
+   *  - inits each column
+   */
+  public void reset() {
+    selectedInUse = false;
+    size = 0;
+    endOfFile = false;
+    for (ColumnVector vc : cols) {
+      if (vc != null) {
+        vc.reset();
+        vc.init();
+      }
+    }
+  }
+
+  /**
+   * Set the maximum number of rows in the batch.
+   * Data is not preserved.
+   */
+  public void ensureSize(int rows) {
+    for(int i=0; i < cols.length; ++i) {
+      cols[i].ensureSize(rows, false);
+    }
+  }
+}