You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ke...@apache.org on 2013/03/05 21:44:52 UTC

svn commit: r1452992 [3/8] - in /hive/trunk: ./ ivy/ ql/ ql/src/gen/protobuf/ ql/src/gen/protobuf/gen-java/ ql/src/gen/protobuf/gen-java/org/ ql/src/gen/protobuf/gen-java/org/apache/ ql/src/gen/protobuf/gen-java/org/apache/hadoop/ ql/src/gen/protobuf/g...

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.EOFException;
+import java.io.IOException;
+
+class BitFieldReader {
+  private RunLengthByteReader input;
+  private final int bitSize;
+  private int current;
+  private int bitsLeft;
+  private final int mask;
+
+  BitFieldReader(InStream input,
+                 int bitSize) throws IOException {
+    this.input = new RunLengthByteReader(input);
+    this.bitSize = bitSize;
+    mask = (1 << bitSize) - 1;
+  }
+
+  private void readByte() throws IOException {
+    if (input.hasNext()) {
+      current = 0xff & input.next();
+      bitsLeft = 8;
+    } else {
+      throw new EOFException("Read past end of bit field from " + input);
+    }
+  }
+
+  int next() throws IOException {
+    int result = 0;
+    int bitsLeftToRead = bitSize;
+    while (bitsLeftToRead > bitsLeft) {
+      result <<= bitsLeft;
+      result |= current & ((1 << bitsLeft) - 1);
+      bitsLeftToRead -= bitsLeft;
+      readByte();
+    }
+    if (bitsLeftToRead > 0) {
+      result <<= bitsLeftToRead;
+      bitsLeft -= bitsLeftToRead;
+      result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
+    }
+    return result & mask;
+  }
+
+  void seek(PositionProvider index) throws IOException {
+    input.seek(index);
+    int consumed = (int) index.getNext();
+    if (consumed > 8) {
+      throw new IllegalArgumentException("Seek past end of byte at " +
+          consumed + " in " + input);
+    } else if (consumed != 0) {
+      readByte();
+      bitsLeft = 8 - consumed;
+    } else {
+      bitsLeft = 0;
+    }
+  }
+
+  void skip(long items) throws IOException {
+    long totalBits = bitSize * items;
+    if (bitsLeft >= totalBits) {
+      bitsLeft -= totalBits;
+    } else {
+      totalBits -= bitsLeft;
+      input.skip(totalBits / 8);
+      current = input.next();
+      bitsLeft = (int) (8 - (totalBits % 8));
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+
+class BitFieldWriter {
+  private RunLengthByteWriter output;
+  private final int bitSize;
+  private byte current = 0;
+  private int bitsLeft = 8;
+
+  BitFieldWriter(PositionedOutputStream output,
+                 int bitSize) throws IOException {
+    this.output = new RunLengthByteWriter(output);
+    this.bitSize = bitSize;
+  }
+
+  private void writeByte() throws IOException {
+    output.write(current);
+    current = 0;
+    bitsLeft = 8;
+  }
+
+  void flush() throws IOException {
+    if (bitsLeft != 8) {
+      writeByte();
+    }
+    output.flush();
+  }
+
+  void write(int value) throws IOException {
+    int bitsToWrite = bitSize;
+    while (bitsToWrite > bitsLeft) {
+      // add the bits to the bottom of the current word
+      current |= value >>> (bitsToWrite - bitsLeft);
+      // subtract out the bits we just added
+      bitsToWrite -= bitsLeft;
+      // zero out the bits above bitsToWrite
+      value &= (1 << bitsToWrite) - 1;
+      writeByte();
+    }
+    bitsLeft -= bitsToWrite;
+    current |= value << bitsLeft;
+    if (bitsLeft == 0) {
+      writeByte();
+    }
+  }
+
+  void getPosition(PositionRecorder recorder) throws IOException {
+    output.getPosition(recorder);
+    recorder.addPosition(8 - bitsLeft);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for boolean columns.
+ */
+public interface BooleanColumnStatistics extends ColumnStatistics {
+  long getFalseCount();
+
+  long getTrueCount();
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics that are available for all types of columns.
+ */
+public interface ColumnStatistics {
+  /**
+   * Get the number of values in this column. It will differ from the number
+   * of rows because of NULL values and repeated values.
+   * @return the number of values
+   */
+  long getNumberOfValues();
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,516 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+class ColumnStatisticsImpl implements ColumnStatistics {
+
+  private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
+      implements BooleanColumnStatistics {
+    private long trueCount = 0;
+
+    BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
+      trueCount = bkt.getCount(0);
+    }
+
+    BooleanStatisticsImpl() {
+    }
+
+    @Override
+    void reset() {
+      super.reset();
+      trueCount = 0;
+    }
+
+    @Override
+    void updateBoolean(boolean value) {
+      if (value) {
+        trueCount += 1;
+      }
+    }
+
+    @Override
+    void merge(ColumnStatisticsImpl other) {
+      super.merge(other);
+      BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
+      trueCount += bkt.trueCount;
+    }
+
+    @Override
+    OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.BucketStatistics.Builder bucket =
+        OrcProto.BucketStatistics.newBuilder();
+      bucket.addCount(trueCount);
+      builder.setBucketStatistics(bucket);
+      return builder;
+    }
+
+    @Override
+    public long getFalseCount() {
+      return getNumberOfValues() - trueCount;
+    }
+
+    @Override
+    public long getTrueCount() {
+      return trueCount;
+    }
+
+    @Override
+    public String toString() {
+      return super.toString() + " true: " + trueCount;
+    }
+  }
+
+  private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
+      implements IntegerColumnStatistics {
+
+    private long minimum = Long.MAX_VALUE;
+    private long maximum = Long.MIN_VALUE;
+    private long sum = 0;
+    private boolean hasMinimum = false;
+    private boolean overflow = false;
+
+    IntegerStatisticsImpl() {
+    }
+
+    IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
+      if (intStat.hasMinimum()) {
+        hasMinimum = true;
+        minimum = intStat.getMinimum();
+      }
+      if (intStat.hasMaximum()) {
+        maximum = intStat.getMaximum();
+      }
+      if (intStat.hasSum()) {
+        sum = intStat.getSum();
+      } else {
+        overflow = true;
+      }
+    }
+
+    @Override
+    void reset() {
+      super.reset();
+      hasMinimum = false;
+      minimum = Long.MAX_VALUE;
+      maximum = Long.MIN_VALUE;
+      sum = 0;
+      overflow = false;
+    }
+
+    @Override
+    void updateInteger(long value) {
+      if (!hasMinimum) {
+        hasMinimum = true;
+        minimum = value;
+        maximum = value;
+      } else if (value < minimum) {
+        minimum = value;
+      } else if (value > maximum) {
+        maximum = value;
+      }
+      if (!overflow) {
+        boolean wasPositive = sum >= 0;
+        sum += value;
+        if ((value >= 0) == wasPositive) {
+          overflow = (sum >= 0) != wasPositive;
+        }
+      }
+    }
+
+    @Override
+    void merge(ColumnStatisticsImpl other) {
+      IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
+      if (!hasMinimum) {
+        hasMinimum = otherInt.hasMinimum;
+        minimum = otherInt.minimum;
+        maximum = otherInt.maximum;
+      } else if (otherInt.hasMinimum) {
+        if (otherInt.minimum < minimum) {
+          minimum = otherInt.minimum;
+        }
+        if (otherInt.maximum > maximum) {
+          maximum = otherInt.maximum;
+        }
+      }
+      super.merge(other);
+      overflow |= otherInt.overflow;
+      if (!overflow) {
+        boolean wasPositive = sum >= 0;
+        sum += otherInt.sum;
+        if ((otherInt.sum >= 0) == wasPositive) {
+          overflow = (sum >= 0) != wasPositive;
+        }
+      }
+    }
+
+    @Override
+    OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.IntegerStatistics.Builder intb =
+        OrcProto.IntegerStatistics.newBuilder();
+      if (hasMinimum) {
+        intb.setMinimum(minimum);
+        intb.setMaximum(maximum);
+      }
+      if (!overflow) {
+        intb.setSum(sum);
+      }
+      builder.setIntStatistics(intb);
+      return builder;
+    }
+
+    @Override
+    public long getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public long getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public boolean isSumDefined() {
+      return !overflow;
+    }
+
+    @Override
+    public long getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (hasMinimum) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      if (!overflow) {
+        buf.append(" sum: ");
+        buf.append(sum);
+      }
+      return buf.toString();
+    }
+  }
+
+  private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
+       implements DoubleColumnStatistics {
+    private boolean hasMinimum = false;
+    private double minimum = Double.MAX_VALUE;
+    private double maximum = Double.MIN_VALUE;
+    private double sum = 0;
+
+    DoubleStatisticsImpl() {
+    }
+
+    DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
+      if (dbl.hasMinimum()) {
+        hasMinimum = true;
+        minimum = dbl.getMinimum();
+      }
+      if (dbl.hasMaximum()) {
+        maximum = dbl.getMaximum();
+      }
+      if (dbl.hasSum()) {
+        sum = dbl.getSum();
+      }
+    }
+
+    @Override
+    void reset() {
+      super.reset();
+      hasMinimum = false;
+      minimum = Double.MAX_VALUE;
+      maximum = Double.MIN_VALUE;
+      sum = 0;
+    }
+
+    @Override
+    void updateDouble(double value) {
+      if (!hasMinimum) {
+        hasMinimum = true;
+        minimum = value;
+        maximum = value;
+      } else if (value < minimum) {
+        minimum = value;
+      } else if (value > maximum) {
+        maximum = value;
+      }
+      sum += value;
+    }
+
+    @Override
+    void merge(ColumnStatisticsImpl other) {
+      super.merge(other);
+      DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
+      if (!hasMinimum) {
+        hasMinimum = dbl.hasMinimum;
+        minimum = dbl.minimum;
+        maximum = dbl.maximum;
+      } else if (dbl.hasMinimum) {
+        if (dbl.minimum < minimum) {
+          minimum = dbl.minimum;
+        }
+        if (dbl.maximum > maximum) {
+          maximum = dbl.maximum;
+        }
+      }
+      sum += dbl.sum;
+    }
+
+    @Override
+    OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.DoubleStatistics.Builder dbl =
+        OrcProto.DoubleStatistics.newBuilder();
+      if (hasMinimum) {
+        dbl.setMinimum(minimum);
+        dbl.setMaximum(maximum);
+      }
+      dbl.setSum(sum);
+      builder.setDoubleStatistics(dbl);
+      return builder;
+    }
+
+    @Override
+    public double getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public double getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public double getSum() {
+      return sum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (hasMinimum) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      buf.append(" sum: ");
+      buf.append(sum);
+      return buf.toString();
+    }
+  }
+
+  private static final class StringStatisticsImpl extends ColumnStatisticsImpl
+      implements StringColumnStatistics {
+    private String minimum = null;
+    private String maximum = null;
+
+    StringStatisticsImpl() {
+    }
+
+    StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      OrcProto.StringStatistics str = stats.getStringStatistics();
+      if (str.hasMaximum()) {
+        maximum = str.getMaximum();
+      }
+      if (str.hasMinimum()) {
+        minimum = str.getMinimum();
+      }
+    }
+
+    @Override
+    void reset() {
+      super.reset();
+      minimum = null;
+      maximum = null;
+    }
+
+    @Override
+    void updateString(String value) {
+      if (minimum == null) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum.compareTo(value) > 0) {
+        minimum = value;
+      } else if (maximum.compareTo(value) < 0) {
+        maximum = value;
+      }
+    }
+
+    @Override
+    void merge(ColumnStatisticsImpl other) {
+      super.merge(other);
+      StringStatisticsImpl str = (StringStatisticsImpl) other;
+      if (minimum == null) {
+        minimum = str.minimum;
+        maximum = str.maximum;
+      } else if (str.minimum != null) {
+        if (minimum.compareTo(str.minimum) > 0) {
+          minimum = str.minimum;
+        } else if (maximum.compareTo(str.maximum) < 0) {
+          maximum = str.maximum;
+        }
+      }
+    }
+
+    @Override
+    OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.StringStatistics.Builder str =
+        OrcProto.StringStatistics.newBuilder();
+      if (getNumberOfValues() != 0) {
+        str.setMinimum(minimum);
+        str.setMaximum(maximum);
+      }
+      result.setStringStatistics(str);
+      return result;
+    }
+
+    @Override
+    public String getMinimum() {
+      return minimum;
+    }
+
+    @Override
+    public String getMaximum() {
+      return maximum;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+      }
+      return buf.toString();
+    }
+  }
+
+  private long count = 0;
+
+  ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
+    if (stats.hasNumberOfValues()) {
+      count = stats.getNumberOfValues();
+    }
+  }
+
+  ColumnStatisticsImpl() {
+  }
+
+  void increment() {
+    count += 1;
+  }
+
+  void updateBoolean(boolean value) {
+    throw new UnsupportedOperationException("Can't update boolean");
+  }
+
+  void updateInteger(long value) {
+    throw new UnsupportedOperationException("Can't update integer");
+  }
+
+  void updateDouble(double value) {
+    throw new UnsupportedOperationException("Can't update double");
+  }
+
+  void updateString(String value) {
+    throw new UnsupportedOperationException("Can't update string");
+  }
+
+  void merge(ColumnStatisticsImpl stats) {
+    count += stats.count;
+  }
+
+  void reset() {
+    count = 0;
+  }
+
+  @Override
+  public long getNumberOfValues() {
+    return count;
+  }
+
+  @Override
+  public String toString() {
+    return "count: " + count;
+  }
+
+  OrcProto.ColumnStatistics.Builder serialize() {
+    OrcProto.ColumnStatistics.Builder builder =
+      OrcProto.ColumnStatistics.newBuilder();
+    builder.setNumberOfValues(count);
+    return builder;
+  }
+
+  static ColumnStatisticsImpl create(ObjectInspector inspector) {
+    switch (inspector.getCategory()) {
+      case PRIMITIVE:
+        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
+          case BOOLEAN:
+            return new BooleanStatisticsImpl();
+          case BYTE:
+          case SHORT:
+          case INT:
+          case LONG:
+            return new IntegerStatisticsImpl();
+          case FLOAT:
+          case DOUBLE:
+            return new DoubleStatisticsImpl();
+          case STRING:
+            return new StringStatisticsImpl();
+          default:
+            return new ColumnStatisticsImpl();
+        }
+      default:
+        return new ColumnStatisticsImpl();
+    }
+  }
+
+  static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
+    if (stats.hasBucketStatistics()) {
+      return new BooleanStatisticsImpl(stats);
+    } else if (stats.hasIntStatistics()) {
+      return new IntegerStatisticsImpl(stats);
+    } else if (stats.hasDoubleStatistics()) {
+      return new DoubleStatisticsImpl(stats);
+    } else if (stats.hasStringStatistics()) {
+      return new StringStatisticsImpl(stats);
+    } else {
+      return new ColumnStatisticsImpl(stats);
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+interface CompressionCodec {
+  /**
+   * Compress the in buffer to the out buffer.
+   * @param in the bytes to compress
+   * @param out the uncompressed bytes
+   * @param overflow put any additional bytes here
+   * @return true if the output is smaller than input
+   * @throws IOException
+   */
+  boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
+                  ) throws IOException;
+
+  /**
+   * Decompress the in buffer to the out buffer.
+   * @param in the bytes to decompress
+   * @param out the decompressed bytes
+   * @throws IOException
+   */
+  void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * An enumeration that lists the generic compression algorithms that
+ * can be applied to ORC files.
+ */
+public enum CompressionKind {
+  NONE, ZLIB, SNAPPY, LZO
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for float and double columns.
+ */
+public interface DoubleColumnStatistics extends ColumnStatistics {
+
+  /**
+   * Get the smallest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the minimum
+   */
+  double getMinimum();
+
+  /**
+   * Get the largest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the maximum
+   */
+  double getMaximum();
+
+  /**
+   * Get the sum of the values in the column.
+   * @return the sum
+   */
+  double getSum();
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,270 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * A class that is a growable array of bytes. Growth is managed in terms of
+ * chunks that are allocated when needed.
+ */
+final class DynamicByteArray {
+  static final int DEFAULT_CHUNKSIZE = 32 * 1024;
+  static final int DEFAULT_NUM_CHUNKS = 128;
+
+  private final int chunkSize;        // our allocation sizes
+  private byte[][] data;              // the real data
+  private int length;                 // max set element index +1
+  private int initializedChunks = 0;  // the number of chunks created
+
+  public DynamicByteArray() {
+    this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
+  }
+
+  public DynamicByteArray(int numChunks, int chunkSize) {
+    if (chunkSize == 0) {
+      throw new IllegalArgumentException("bad chunksize");
+    }
+    this.chunkSize = chunkSize;
+    data = new byte[numChunks][];
+  }
+
+  /**
+   * Ensure that the given index is valid.
+   */
+  private void grow(int chunkIndex) {
+    if (chunkIndex >= initializedChunks) {
+      if (chunkIndex >= data.length) {
+        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+        byte[][] newChunk = new byte[newSize][];
+        System.arraycopy(data, 0, newChunk, 0, data.length);
+        data = newChunk;
+      }
+      for(int i=initializedChunks; i <= chunkIndex; ++i) {
+        data[i] = new byte[chunkSize];
+      }
+      initializedChunks = chunkIndex + 1;
+    }
+  }
+
+  public byte get(int index) {
+    if (index >= length) {
+      throw new IndexOutOfBoundsException("Index " + index +
+                                            " is outside of 0.." +
+                                            (length - 1));
+    }
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    return data[i][j];
+  }
+
+  public void set(int index, byte value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] = value;
+  }
+
+  public int add(byte value) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow(i);
+    data[i][j] = value;
+    int result = length;
+    length += 1;
+    return result;
+  }
+
+  /**
+   * Copy a slice of a byte array into our buffer.
+   * @param value the array to copy from
+   * @param valueOffset the first location to copy from value
+   * @param valueLength the number of bytes to copy from value
+   * @return
+   */
+  public int add(byte[] value, int valueOffset, int valueLength) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow((length + valueLength) / chunkSize);
+    int remaining = valueLength;
+    while (remaining > 0) {
+      int size = Math.min(remaining, chunkSize - j);
+      System.arraycopy(value, valueOffset, data[i], j, size);
+      remaining -= size;
+      valueOffset += size;
+      i += 1;
+      j = 0;
+    }
+    int result = length;
+    length += valueLength;
+    return result;
+  }
+
+  /**
+   * Read the entire stream into this array.
+   * @param in the stream to read from
+   * @throws IOException
+   */
+  public void readAll(InputStream in) throws IOException {
+    int currentChunk = length / chunkSize;
+    int currentOffset = length % chunkSize;
+    grow(currentChunk);
+    int currentLength = in.read(data[currentChunk], currentOffset,
+      chunkSize - currentOffset);
+    while (currentLength > 0) {
+      length += currentLength;
+      currentOffset = length % chunkSize;
+      if (currentOffset == 0) {
+        currentChunk = length / chunkSize;
+        grow(currentChunk);
+      }
+      currentLength = in.read(data[currentChunk], currentOffset,
+        chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Byte compare a set of bytes against the bytes in this dynamic array.
+   * @param other source of the other bytes
+   * @param otherOffset start offset in the other array
+   * @param otherLength number of bytes in the other array
+   * @param ourOffset the offset in our array
+   * @param ourLength the number of bytes in our array
+   * @return negative for less, 0 for equal, positive for greater
+   */
+  public int compare(byte[] other, int otherOffset, int otherLength,
+                     int ourOffset, int ourLength) {
+    int currentChunk = ourOffset / chunkSize;
+    int currentOffset = ourOffset % chunkSize;
+    int maxLength = Math.min(otherLength, ourLength);
+    while (maxLength > 0 &&
+      other[otherOffset] == data[currentChunk][currentOffset]) {
+      otherOffset += 1;
+      currentOffset += 1;
+      if (currentOffset == chunkSize) {
+        currentChunk += 1;
+        currentOffset = 0;
+      }
+      maxLength -= 1;
+    }
+    if (maxLength == 0) {
+      return otherLength - ourLength;
+    }
+    int otherByte = 0xff & other[otherOffset];
+    int ourByte = 0xff & data[currentChunk][currentOffset];
+    return otherByte > ourByte ? 1 : -1;
+  }
+
+  /**
+   * Get the size of the array.
+   * @return the number of bytes in the array
+   */
+  public int size() {
+    return length;
+  }
+
+  /**
+   * Clear the array to its original pristine state.
+   */
+  public void clear() {
+    length = 0;
+    for(int i=0; i < data.length; ++i) {
+      data[i] = null;
+    }
+    initializedChunks = 0;
+  }
+
+  /**
+   * Set a text value from the bytes in this dynamic array.
+   * @param result the value to set
+   * @param offset the start of the bytes to copy
+   * @param length the number of bytes to copy
+   */
+  public void setText(Text result, int offset, int length) {
+    result.clear();
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    int currentLength = Math.min(length, chunkSize - currentOffset);
+    while (length > 0) {
+      result.append(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+      currentLength = Math.min(length, chunkSize - currentOffset);
+    }
+  }
+
+  /**
+   * Write out a range of this dynamic array to an output stream.
+   * @param out the stream to write to
+   * @param offset the first offset to write
+   * @param length the number of bytes to write
+   * @throws IOException
+   */
+  public void write(OutputStream out, int offset,
+                    int length) throws IOException {
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    while (length > 0) {
+      int currentLength = Math.min(length, chunkSize - currentOffset);
+      out.write(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+    }
+  }
+
+  public String toString() {
+    int i;
+    StringBuilder sb = new StringBuilder(length * 3);
+
+    sb.append('{');
+    int l = length - 1;
+    for (i=0; i<l; i++) {
+      sb.append(Integer.toHexString(get(i)));
+      sb.append(',');
+    }
+    sb.append(get(i));
+    sb.append('}');
+
+    return sb.toString();
+  }
+
+  public void setByteBuffer(ByteBuffer result, int offset, int length) {
+    result.clear();
+    int currentChunk = offset / chunkSize;
+    int currentOffset = offset % chunkSize;
+    int currentLength = Math.min(length, chunkSize - currentOffset);
+    while (length > 0) {
+      result.put(data[currentChunk], currentOffset, currentLength);
+      length -= currentLength;
+      currentChunk += 1;
+      currentOffset = 0;
+      currentLength = Math.min(length, chunkSize - currentOffset);
+    }
+  }
+}
+

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Dynamic int array that uses primitive types and chunks to avoid copying
+ * large number of integers when it resizes.
+ *
+ * The motivation for this class is memory optimization, i.e. space efficient
+ * storage of potentially huge arrays without good a-priori size guesses.
+ *
+ * The API of this class is between a primitive array and a AbstractList. It's
+ * not a Collection implementation because it handles primitive types, but the
+ * API could be extended to support iterators and the like.
+ *
+ * NOTE: Like standard Collection implementations/arrays, this class is not
+ * synchronized.
+ */
+final class DynamicIntArray {
+  static final int DEFAULT_CHUNKSIZE = 8 * 1024;
+  static final int INIT_CHUNKS = 128;
+
+  private final int chunkSize;       // our allocation size
+  private int[][] data;              // the real data
+  private int length;                // max set element index +1
+  private int initializedChunks = 0; // the number of created chunks
+
+  public DynamicIntArray() {
+    this(DEFAULT_CHUNKSIZE);
+  }
+
+  public DynamicIntArray(int chunkSize) {
+    this.chunkSize = chunkSize;
+
+    data = new int[INIT_CHUNKS][];
+  }
+
+  /**
+   * Ensure that the given index is valid.
+   */
+  private void grow(int chunkIndex) {
+    if (chunkIndex >= initializedChunks) {
+      if (chunkIndex >= data.length) {
+        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
+        int[][] newChunk = new int[newSize][];
+        System.arraycopy(data, 0, newChunk, 0, data.length);
+        data = newChunk;
+      }
+      for (int i=initializedChunks; i <= chunkIndex; ++i) {
+        data[i] = new int[chunkSize];
+      }
+      initializedChunks = chunkIndex + 1;
+    }
+  }
+
+  public int get(int index) {
+    if (index >= length) {
+      throw new IndexOutOfBoundsException("Index " + index +
+                                            " is outside of 0.." +
+                                            (length - 1));
+    }
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    return data[i][j];
+  }
+
+  public void set(int index, int value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] = value;
+  }
+
+  public void increment(int index, int value) {
+    int i = index / chunkSize;
+    int j = index % chunkSize;
+    grow(i);
+    if (index >= length) {
+      length = index + 1;
+    }
+    data[i][j] += value;
+  }
+
+  public void add(int value) {
+    int i = length / chunkSize;
+    int j = length % chunkSize;
+    grow(i);
+    data[i][j] = value;
+    length += 1;
+  }
+
+  public int size() {
+    return length;
+  }
+
+  public void clear() {
+    length = 0;
+    for(int i=0; i < data.length; ++i) {
+      data[i] = null;
+    }
+    initializedChunks = 0;
+  }
+
+  public String toString() {
+    int i;
+    StringBuilder sb = new StringBuilder(length * 4);
+
+    sb.append('{');
+    int l = length - 1;
+    for (i=0; i<l; i++) {
+      sb.append(get(i));
+      sb.append(',');
+    }
+    sb.append(get(i));
+    sb.append('}');
+
+    return sb.toString();
+  }
+
+}
+

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A tool for printing out the file structure of ORC files.
+ */
+public final class FileDump {
+
+  // not used
+  private FileDump() {}
+
+  public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+    for(String filename: args) {
+      System.out.println("Structure for " + filename);
+      Path path = new Path(filename);
+      Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
+      RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
+      System.out.println("Rows: " + reader.getNumberOfRows());
+      System.out.println("Compression: " + reader.getCompression());
+      if (reader.getCompression() != CompressionKind.NONE) {
+        System.out.println("Compression size: " + reader.getCompressionSize());
+      }
+      System.out.println("Type: " + reader.getObjectInspector().getTypeName());
+      ColumnStatistics[] stats = reader.getStatistics();
+      System.out.println("\nStatistics:");
+      for(int i=0; i < stats.length; ++i) {
+        System.out.println("  Column " + i + ": " + stats[i].toString());
+      }
+      System.out.println("\nStripes:");
+      for(StripeInformation stripe: reader.getStripes()) {
+        long stripeStart = stripe.getOffset();
+        System.out.println("  Stripe: " + stripe.toString());
+        OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+        long sectionStart = stripeStart;
+        for(OrcProto.Stream section: footer.getStreamsList()) {
+          System.out.println("    Stream: column " + section.getColumn() +
+            " section " + section.getKind() + " start: " + sectionStart +
+            " length " + section.getLength());
+          sectionStart += section.getLength();
+        }
+        for(int i=0; i < footer.getColumnsCount(); ++i) {
+          OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+          StringBuilder buf = new StringBuilder();
+          buf.append("    Encoding column ");
+          buf.append(i);
+          buf.append(": ");
+          buf.append(encoding.getKind());
+          if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY) {
+            buf.append("[");
+            buf.append(encoding.getDictionarySize());
+            buf.append("]");
+          }
+          System.out.println(buf);
+        }
+      }
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+abstract class InStream extends InputStream {
+
+  private static class UncompressedStream extends InStream {
+    private final String name;
+    private byte[] array;
+    private int offset;
+    private final int base;
+    private final int limit;
+
+    public UncompressedStream(String name, ByteBuffer input) {
+      this.name = name;
+      this.array = input.array();
+      base = input.arrayOffset() + input.position();
+      offset = base;
+      limit = input.arrayOffset() + input.limit();
+    }
+
+    @Override
+    public int read() {
+      if (offset == limit) {
+        return -1;
+      }
+      return 0xff & array[offset++];
+    }
+
+    @Override
+    public int read(byte[] data, int offset, int length) {
+      if (this.offset == limit) {
+        return -1;
+      }
+      int actualLength = Math.min(length, limit - this.offset);
+      System.arraycopy(array, this.offset, data, offset, actualLength);
+      this.offset += actualLength;
+      return actualLength;
+    }
+
+    @Override
+    public int available() {
+      return limit - offset;
+    }
+
+    @Override
+    public void close() {
+      array = null;
+      offset = 0;
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      offset = base + (int) index.getNext();
+    }
+
+    @Override
+    public String toString() {
+      return "uncompressed stream " + name + " base: " + base +
+         " offset: " + offset + " limit: " + limit;
+    }
+  }
+
+  private static class CompressedStream extends InStream {
+    private final String name;
+    private byte[] array;
+    private final int bufferSize;
+    private ByteBuffer uncompressed = null;
+    private final CompressionCodec codec;
+    private int offset;
+    private final int base;
+    private final int limit;
+    private boolean isUncompressedOriginal;
+
+    public CompressedStream(String name, ByteBuffer input,
+                            CompressionCodec codec, int bufferSize
+                           ) {
+      this.array = input.array();
+      this.name = name;
+      this.codec = codec;
+      this.bufferSize = bufferSize;
+      base = input.arrayOffset() + input.position();
+      offset = base;
+      limit = input.arrayOffset() + input.limit();
+    }
+
+    private void readHeader() throws IOException {
+      if (limit - offset > OutStream.HEADER_SIZE) {
+        int chunkLength = ((0xff & array[offset + 2]) << 15) |
+          ((0xff & array[offset + 1]) << 7) | ((0xff & array[offset]) >> 1);
+        if (chunkLength > bufferSize) {
+          throw new IllegalArgumentException("Buffer size too small. size = " +
+              bufferSize + " needed = " + chunkLength);
+        }
+        boolean isOriginal = (array[offset] & 0x01) == 1;
+        offset += OutStream.HEADER_SIZE;
+        if (isOriginal) {
+          isUncompressedOriginal = true;
+          uncompressed = ByteBuffer.wrap(array, offset, chunkLength);
+        } else {
+          if (isUncompressedOriginal) {
+            uncompressed = ByteBuffer.allocate(bufferSize);
+            isUncompressedOriginal = false;
+          } else if (uncompressed == null) {
+            uncompressed = ByteBuffer.allocate(bufferSize);
+          } else {
+            uncompressed.clear();
+          }
+          codec.decompress(ByteBuffer.wrap(array, offset, chunkLength),
+            uncompressed);
+        }
+        offset += chunkLength;
+      } else {
+        throw new IllegalStateException("Can't read header");
+      }
+    }
+
+    @Override
+    public int read() throws IOException {
+      if (uncompressed == null || uncompressed.remaining() == 0) {
+        if (offset == limit) {
+          return -1;
+        }
+        readHeader();
+      }
+      return 0xff & uncompressed.get();
+    }
+
+    @Override
+    public int read(byte[] data, int offset, int length) throws IOException {
+      if (uncompressed == null || uncompressed.remaining() == 0) {
+        if (this.offset == this.limit) {
+          return -1;
+        }
+        readHeader();
+      }
+      int actualLength = Math.min(length, uncompressed.remaining());
+      System.arraycopy(uncompressed.array(),
+        uncompressed.arrayOffset() + uncompressed.position(), data,
+        offset, actualLength);
+      uncompressed.position(uncompressed.position() + actualLength);
+      return actualLength;
+    }
+
+    @Override
+    public int available() throws IOException {
+      if (uncompressed == null || uncompressed.remaining() == 0) {
+        if (offset == limit) {
+          return 0;
+        }
+        readHeader();
+      }
+      return uncompressed.remaining();
+    }
+
+    @Override
+    public void close() {
+      array = null;
+      uncompressed = null;
+      offset = 0;
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      offset = base + (int) index.getNext();
+      int uncompBytes = (int) index.getNext();
+      if (uncompBytes != 0) {
+        readHeader();
+        uncompressed.position(uncompressed.position() + uncompBytes);
+      } else if (uncompressed != null) {
+        uncompressed.position(uncompressed.limit());
+      }
+    }
+
+    @Override
+    public String toString() {
+      return "compressed stream " + name + " base: " + base +
+          " offset: " + offset + " limit: " + limit +
+          (uncompressed == null ? "" :
+              " uncompressed: " + uncompressed.position() + " to " +
+                  uncompressed.limit());
+    }
+  }
+
+  public abstract void seek(PositionProvider index) throws IOException;
+
+  public static InStream create(String name,
+                                ByteBuffer input,
+                                CompressionCodec codec,
+                                int bufferSize) throws IOException {
+    if (codec == null) {
+      return new UncompressedStream(name, input);
+    } else {
+      return new CompressedStream(name, input, codec, bufferSize);
+    }
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+/**
+ * Statistics for all of the integer columns, such as byte, short, int, and
+ * long.
+ */
+public interface IntegerColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the smallest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the minimum
+   */
+  long getMinimum();
+
+  /**
+   * Get the largest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the maximum
+   */
+  long getMaximum();
+
+  /**
+   * Is the sum defined? If the sum overflowed the counter this will be false.
+   * @return is the sum available
+   */
+  boolean isSumDefined();
+
+  /**
+   * Get the sum of the column. Only valid if isSumDefined returns true.
+   * @return the sum of the column
+   */
+  long getSum();
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import java.io.IOException;
+
+/**
+ * Contains factory methods to read or write ORC files.
+ */
+public final class OrcFile {
+
+  public static final String MAGIC = "ORC";
+  public static final String COMPRESSION = "orc.compress";
+  static final String DEFAULT_COMPRESSION = "ZLIB";
+  public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";
+  static final String DEFAULT_COMPRESSION_BLOCK_SIZE = "262144";
+  public static final String STRIPE_SIZE = "orc.stripe.size";
+  static final String DEFAULT_STRIPE_SIZE = "268435456";
+  public static final String ROW_INDEX_STRIDE = "orc.row.index.stride";
+  static final String DEFAULT_ROW_INDEX_STRIDE = "10000";
+  public static final String ENABLE_INDEXES = "orc.create.index";
+
+  // unused
+  private OrcFile() {}
+
+  /**
+   * Create an ORC file reader.
+   * @param fs file system
+   * @param path file name to read from
+   * @return a new ORC file reader.
+   * @throws IOException
+   */
+  public static Reader createReader(FileSystem fs, Path path
+                                    ) throws IOException {
+    return new ReaderImpl(fs, path);
+  }
+
+  /**
+   * Create an ORC file streamFactory.
+   * @param fs file system
+   * @param path filename to write to
+   * @param inspector the ObjectInspector that inspects the rows
+   * @param stripeSize the number of bytes in a stripe
+   * @param compress how to compress the file
+   * @param bufferSize the number of bytes to compress at once
+   * @param rowIndexStride the number of rows between row index entries or
+   *                       0 to suppress all indexes
+   * @return a new ORC file streamFactory
+   * @throws IOException
+   */
+  public static Writer createWriter(FileSystem fs,
+                                    Path path,
+                                    ObjectInspector inspector,
+                                    long stripeSize,
+                                    CompressionKind compress,
+                                    int bufferSize,
+                                    int rowIndexStride) throws IOException {
+    return new WriterImpl(fs, path, inspector, stripeSize, compress,
+      bufferSize, rowIndexStride);
+  }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A MapReduce/Hive input format for ORC files.
+ */
+public class OrcInputFormat  extends FileInputFormat<NullWritable, OrcStruct>
+  implements InputFormatChecker {
+
+  private static class OrcRecordReader
+      implements RecordReader<NullWritable, OrcStruct> {
+    private final org.apache.hadoop.hive.ql.io.orc.RecordReader reader;
+    private final long offset;
+    private final long length;
+    private final OrcStruct row;
+    private boolean firstRow = true;
+    private float progress = 0.0f;
+
+    OrcRecordReader(Reader file, Configuration conf,
+                    long offset, long length) throws IOException {
+      this.reader = file.rows(offset, length,
+          findIncludedColumns(file.getTypes(), conf));
+      this.offset = offset;
+      this.length = length;
+      if (reader.hasNext()) {
+        row = (OrcStruct) reader.next(null);
+      } else {
+        row = null;
+      }
+    }
+
+    @Override
+    public boolean next(NullWritable key, OrcStruct value) throws IOException {
+      if (firstRow) {
+        firstRow = false;
+        assert value == row: "User didn't pass our value back " + value;
+        return row != null;
+      } else if (reader.hasNext()) {
+        Object obj = reader.next(value);
+        progress = reader.getProgress();
+        assert obj == value : "Reader returned different object " + obj;
+        return true;
+      }
+      return false;
+    }
+
+    @Override
+    public NullWritable createKey() {
+      return NullWritable.get();
+    }
+
+    @Override
+    public OrcStruct createValue() {
+      return row;
+    }
+
+    @Override
+    public long getPos() throws IOException {
+      return offset + (long) (progress * length);
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+
+    @Override
+    public float getProgress() throws IOException {
+      return progress;
+    }
+  }
+
+  public OrcInputFormat() {
+    // just set a really small lower bound
+    setMinSplitSize(16 * 1024);
+  }
+
+  /**
+   * Recurse down into a type subtree turning on all of the sub-columns.
+   * @param types the types of the file
+   * @param result the global view of columns that should be included
+   * @param typeId the root of tree to enable
+   */
+  private static void includeColumnRecursive(List<OrcProto.Type> types,
+                                             boolean[] result,
+                                             int typeId) {
+    result[typeId] = true;
+    OrcProto.Type type = types.get(typeId);
+    int children = type.getSubtypesCount();
+    for(int i=0; i < children; ++i) {
+      includeColumnRecursive(types, result, type.getSubtypes(i));
+    }
+  }
+
+  /**
+   * Take the configuration and figure out which columns we need to include.
+   * @param types the types of the file
+   * @param conf the configuration
+   * @return true for each column that should be included
+   */
+  private static boolean[] findIncludedColumns(List<OrcProto.Type> types,
+                                               Configuration conf) {
+    String includedStr =
+        conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
+    if (includedStr == null) {
+      return null;
+    } else {
+      int numColumns = types.size();
+      boolean[] result = new boolean[numColumns];
+      result[0] = true;
+      OrcProto.Type root = types.get(0);
+      List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
+      for(int i=0; i < root.getSubtypesCount(); ++i) {
+        if (included.contains(i)) {
+          includeColumnRecursive(types, result, root.getSubtypes(i));
+        }
+      }
+      // if we are filtering at least one column, return the boolean array
+      for(boolean include: result) {
+        if (!include) {
+          return result;
+        }
+      }
+      return null;
+    }
+  }
+
+  @Override
+  public RecordReader<NullWritable, OrcStruct>
+      getRecordReader(InputSplit inputSplit, JobConf conf,
+                      Reporter reporter) throws IOException {
+    FileSplit fileSplit = (FileSplit) inputSplit;
+    Path path = fileSplit.getPath();
+    FileSystem fs = path.getFileSystem(conf);
+    reporter.setStatus(fileSplit.toString());
+    return new OrcRecordReader(OrcFile.createReader(fs, path), conf,
+                               fileSplit.getStart(), fileSplit.getLength());
+  }
+
+  @Override
+  public boolean validateInput(FileSystem fs, HiveConf conf,
+                               ArrayList<FileStatus> files
+                              ) throws IOException {
+    if (files.size() <= 0) {
+      return false;
+    }
+    for (FileStatus file : files) {
+      try {
+        OrcFile.createReader(fs, file.getPath());
+      } catch (IOException e) {
+        return false;
+      }
+    }
+    return true;
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Progressable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Properties;
+
+/**
+ * A Hive OutputFormat for ORC files.
+ */
+public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
+                        implements HiveOutputFormat<NullWritable, OrcSerdeRow> {
+
+  private static class OrcRecordWriter
+      implements RecordWriter<NullWritable, OrcSerdeRow>,
+                 FileSinkOperator.RecordWriter {
+    private Writer writer = null;
+    private final FileSystem fs;
+    private final Path path;
+    private final Configuration conf;
+    private final long stripeSize;
+    private final int compressionSize;
+    private final CompressionKind compress;
+    private final int rowIndexStride;
+
+    OrcRecordWriter(FileSystem fs, Path path, Configuration conf,
+                    String stripeSize, String compress,
+                    String compressionSize, String rowIndexStride) {
+      this.fs = fs;
+      this.path = path;
+      this.conf = conf;
+      this.stripeSize = Long.valueOf(stripeSize);
+      this.compress = CompressionKind.valueOf(compress);
+      this.compressionSize = Integer.valueOf(compressionSize);
+      this.rowIndexStride = Integer.valueOf(rowIndexStride);
+    }
+
+    @Override
+    public void write(NullWritable nullWritable,
+                      OrcSerdeRow row) throws IOException {
+      if (writer == null) {
+        writer = OrcFile.createWriter(fs, path, row.getInspector(), stripeSize,
+          compress, compressionSize, rowIndexStride);
+      }
+      writer.addRow(row.getRow());
+    }
+
+    @Override
+    public void write(Writable row) throws IOException {
+      OrcSerdeRow serdeRow = (OrcSerdeRow) row;
+      if (writer == null) {
+        writer = OrcFile.createWriter(fs, path, serdeRow.getInspector(),
+            stripeSize, compress, compressionSize, rowIndexStride);
+      }
+      writer.addRow(serdeRow.getRow());
+    }
+
+    @Override
+    public void close(Reporter reporter) throws IOException {
+      close(true);
+    }
+
+    @Override
+    public void close(boolean b) throws IOException {
+      // if we haven't written any rows, we need to create a file with a
+      // generic schema.
+      if (writer == null) {
+        // a row with no columns
+        ObjectInspector inspector = ObjectInspectorFactory.
+            getStandardStructObjectInspector(new ArrayList<String>(),
+                new ArrayList<ObjectInspector>());
+        writer = OrcFile.createWriter(fs, path, inspector, stripeSize,
+            compress, compressionSize, rowIndexStride);
+      }
+      writer.close();
+    }
+  }
+
+  @Override
+  public RecordWriter<NullWritable, OrcSerdeRow>
+      getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
+                      Progressable reporter) throws IOException {
+    return new OrcRecordWriter(fileSystem,  new Path(name), conf,
+      OrcFile.DEFAULT_STRIPE_SIZE, OrcFile.DEFAULT_COMPRESSION,
+      OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE, OrcFile.DEFAULT_ROW_INDEX_STRIDE);
+  }
+
+  @Override
+  public FileSinkOperator.RecordWriter
+     getHiveRecordWriter(JobConf conf,
+                         Path path,
+                         Class<? extends Writable> valueClass,
+                         boolean isCompressed,
+                         Properties tableProperties,
+                         Progressable reporter) throws IOException {
+    String stripeSize = tableProperties.getProperty(OrcFile.STRIPE_SIZE,
+        OrcFile.DEFAULT_STRIPE_SIZE);
+    String compression = tableProperties.getProperty(OrcFile.COMPRESSION,
+        OrcFile.DEFAULT_COMPRESSION);
+    String compressionSize =
+      tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE,
+        OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE);
+    String rowIndexStride =
+        tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE,
+            OrcFile.DEFAULT_ROW_INDEX_STRIDE);
+    if ("false".equals(tableProperties.getProperty(OrcFile.ENABLE_INDEXES))) {
+      rowIndexStride = "0";
+    }
+    return new OrcRecordWriter(path.getFileSystem(conf), path, conf,
+      stripeSize, compression, compressionSize, rowIndexStride);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java?rev=1452992&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java Tue Mar  5 20:44:50 2013
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Properties;
+
+/**
+ * A serde class for ORC.
+ * It transparently passes the object to/from the ORC file reader/writer.
+ */
+public class OrcSerde implements SerDe {
+  private final OrcSerdeRow row = new OrcSerdeRow();
+  private ObjectInspector inspector = null;
+
+  final class OrcSerdeRow implements Writable {
+    private Object realRow;
+    private ObjectInspector inspector;
+
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+      throw new UnsupportedOperationException("can't write the bundle");
+    }
+
+    @Override
+    public void readFields(DataInput dataInput) throws IOException {
+      throw new UnsupportedOperationException("can't read the bundle");
+    }
+
+    ObjectInspector getInspector() {
+      return inspector;
+    }
+
+    Object getRow() {
+      return realRow;
+    }
+  }
+
+  @Override
+  public void initialize(Configuration conf, Properties table) {
+    // Read the configuration parameters
+    String columnNameProperty = table.getProperty("columns");
+    // NOTE: if "columns.types" is missing, all columns will be of String type
+    String columnTypeProperty = table.getProperty("columns.types");
+
+    // Parse the configuration parameters
+    ArrayList<String> columnNames = new ArrayList<String>();
+    if (columnNameProperty != null && columnNameProperty.length() > 0) {
+      for(String name: columnNameProperty.split(",")) {
+        columnNames.add(name);
+      }
+    }
+    if (columnTypeProperty == null) {
+      // Default type: all string
+      StringBuilder sb = new StringBuilder();
+      for (int i = 0; i < columnNames.size(); i++) {
+        if (i > 0) {
+          sb.append(":");
+        }
+        sb.append("string");
+      }
+      columnTypeProperty = sb.toString();
+    }
+
+    ArrayList<TypeInfo> fieldTypes =
+      TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    StructTypeInfo rootType = new StructTypeInfo();
+    rootType.setAllStructFieldNames(columnNames);
+    rootType.setAllStructFieldTypeInfos(fieldTypes);
+    inspector = OrcStruct.createObjectInspector(rootType);
+  }
+
+  @Override
+  public Class<? extends Writable> getSerializedClass() {
+    return OrcSerdeRow.class;
+  }
+
+  @Override
+  public Writable serialize(Object realRow, ObjectInspector inspector) {
+    row.realRow = realRow;
+    row.inspector = inspector;
+    return row;
+  }
+
+  @Override
+  public Object deserialize(Writable writable) throws SerDeException {
+    return writable;
+  }
+
+  @Override
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return inspector;
+  }
+
+  /**
+   * Always returns null, since serialized size doesn't make sense in the
+   * context of ORC files.
+   * @return null
+   */
+  @Override
+  public SerDeStats getSerDeStats() {
+    return null;
+  }
+}