You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2017/07/19 16:58:57 UTC

[34/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java b/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java
new file mode 100644
index 0000000..92ddfbe
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/BinaryColumnStatistics.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for binary columns.
+ */
+public interface BinaryColumnStatistics extends ColumnStatistics {
+  long getSum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/BloomFilterIO.java b/orc/src/java/org/apache/hive/orc/BloomFilterIO.java
new file mode 100644
index 0000000..a6e6408
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/BloomFilterIO.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hive.common.util.BloomFilter;
+
+import com.google.common.primitives.Longs;
+
+public class BloomFilterIO extends BloomFilter {
+
+  public BloomFilterIO(long expectedEntries) {
+    super(expectedEntries, DEFAULT_FPP);
+  }
+
+  public BloomFilterIO(long expectedEntries, double fpp) {
+    super(expectedEntries, fpp);
+  }
+
+/**
+ * Initializes the BloomFilter from the given Orc BloomFilter
+ */
+  public BloomFilterIO(OrcProto.BloomFilter bloomFilter) {
+    this.bitSet = new BitSet(Longs.toArray(bloomFilter.getBitsetList()));
+    this.numHashFunctions = bloomFilter.getNumHashFunctions();
+    this.numBits = (int) this.bitSet.bitSize();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java b/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java
new file mode 100644
index 0000000..14fc6cf
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/BooleanColumnStatistics.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for boolean columns.
+ */
+public interface BooleanColumnStatistics extends ColumnStatistics {
+  long getFalseCount();
+
+  long getTrueCount();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/ColumnStatistics.java b/orc/src/java/org/apache/hive/orc/ColumnStatistics.java
new file mode 100644
index 0000000..5ab8f55
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/ColumnStatistics.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics that are available for all types of columns.
+ */
+public interface ColumnStatistics {
+  /**
+   * Get the number of values in this column. It will differ from the number
+   * of rows because of NULL values and repeated values.
+   * @return the number of values
+   */
+  long getNumberOfValues();
+
+  /**
+   * Returns true if there are nulls in the scope of column statistics.
+   * @return true if null present else false
+   */
+  boolean hasNull();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/CompressionCodec.java b/orc/src/java/org/apache/hive/orc/CompressionCodec.java
new file mode 100644
index 0000000..eee8dc3
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/CompressionCodec.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+import javax.annotation.Nullable;
+
+public interface CompressionCodec {
+
+  enum Modifier {
+    /* speed/compression tradeoffs */
+    FASTEST,
+    FAST,
+    DEFAULT,
+    /* data sensitivity modifiers */
+    TEXT,
+    BINARY
+  };
+
+  /**
+   * Compress the in buffer to the out buffer.
+   * @param in the bytes to compress
+   * @param out the uncompressed bytes
+   * @param overflow put any additional bytes here
+   * @return true if the output is smaller than input
+   * @throws IOException
+   */
+  boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
+                  ) throws IOException;
+
+  /**
+   * Decompress the in buffer to the out buffer.
+   * @param in the bytes to decompress
+   * @param out the decompressed bytes
+   * @throws IOException
+   */
+  void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
+
+  /**
+   * Produce a modified compression codec if the underlying algorithm allows
+   * modification.
+   *
+   * This does not modify the current object, but returns a new object if
+   * modifications are possible. Returns the same object if no modifications
+   * are possible.
+   * @param modifiers compression modifiers
+   * @return codec for use after optional modification
+   */
+  CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/CompressionKind.java b/orc/src/java/org/apache/hive/orc/CompressionKind.java
new file mode 100644
index 0000000..0d78642
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/CompressionKind.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+/**
+ * An enumeration that lists the generic compression algorithms that
+ * can be applied to ORC files.
+ */
+public enum CompressionKind {
+  NONE, ZLIB, SNAPPY, LZO
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DataReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DataReader.java b/orc/src/java/org/apache/hive/orc/DataReader.java
new file mode 100644
index 0000000..091a5b9
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DataReader.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hive.orc.impl.OrcIndex;
+
+/** An abstract data reader that IO formats can use to read bytes from underlying storage. */
+public interface DataReader extends AutoCloseable {
+
+  /** Opens the DataReader, making it ready to use. */
+  void open() throws IOException;
+
+  OrcIndex readRowIndex(StripeInformation stripe,
+                        OrcProto.StripeFooter footer,
+                        boolean[] included, OrcProto.RowIndex[] indexes,
+                        boolean[] sargColumns,
+                        OrcProto.BloomFilterIndex[] bloomFilterIndices
+                        ) throws IOException;
+
+  OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException;
+
+  /** Reads the data.
+   *
+   * Note that for the cases such as zero-copy read, caller must release the disk ranges
+   * produced after being done with them. Call isTrackingDiskRanges to find out if this is needed.
+   * @param range List if disk ranges to read. Ranges with data will be ignored.
+   * @param baseOffset Base offset from the start of the file of the ranges in disk range list.
+   * @param doForceDirect Whether the data should be read into direct buffers.
+   * @return New or modified list of DiskRange-s, where all the ranges are filled with data.
+   */
+  DiskRangeList readFileData(
+      DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException;
+
+
+  /**
+   * Whether the user should release buffers created by readFileData. See readFileData javadoc.
+   */
+  boolean isTrackingDiskRanges();
+
+  /**
+   * Releases buffers created by readFileData. See readFileData javadoc.
+   * @param toRelease The buffer to release.
+   */
+  void releaseBuffer(ByteBuffer toRelease);
+
+  /**
+   * Clone the entire state of the DataReader with the assumption that the
+   * clone will be closed at a different time. Thus, any file handles in the
+   * implementation need to be cloned.
+   * @return a new instance
+   */
+  DataReader clone();
+
+  @Override
+  public void close() throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java b/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java
new file mode 100644
index 0000000..b03dcec
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DateColumnStatistics.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.util.Date;
+
+/**
+ * Statistics for DATE columns.
+ */
+public interface DateColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the minimum value for the column.
+   * @return minimum value
+   */
+  Date getMinimum();
+
+  /**
+   * Get the maximum value for the column.
+   * @return maximum value
+   */
+  Date getMaximum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java b/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java
new file mode 100644
index 0000000..4dbbc12
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DecimalColumnStatistics.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+/**
+ * Statistics for decimal columns.
+ */
+public interface DecimalColumnStatistics extends ColumnStatistics {
+
+  /**
+   * Get the minimum value for the column.
+   * @return the minimum value
+   */
+  HiveDecimal getMinimum();
+
+  /**
+   * Get the maximum value for the column.
+   * @return the maximum value
+   */
+  HiveDecimal getMaximum();
+
+  /**
+   * Get the sum of the values of the column.
+   * @return the sum
+   */
+  HiveDecimal getSum();
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java b/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java
new file mode 100644
index 0000000..5f2d426
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/DoubleColumnStatistics.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for float and double columns.
+ */
+public interface DoubleColumnStatistics extends ColumnStatistics {
+
+  /**
+   * Get the smallest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the minimum
+   */
+  double getMinimum();
+
+  /**
+   * Get the largest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the maximum
+   */
+  double getMaximum();
+
+  /**
+   * Get the sum of the values in the column.
+   * @return the sum
+   */
+  double getSum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/FileFormatException.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/FileFormatException.java b/orc/src/java/org/apache/hive/orc/FileFormatException.java
new file mode 100644
index 0000000..30356c3
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/FileFormatException.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.io.IOException;
+
+/**
+ * Thrown when an invalid file format is encountered.
+ */
+public class FileFormatException extends IOException {
+
+  public FileFormatException(String errMsg) {
+    super(errMsg);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/FileMetadata.java b/orc/src/java/org/apache/hive/orc/FileMetadata.java
new file mode 100644
index 0000000..acb8a78
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/FileMetadata.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.util.List;
+
+/**
+ * Cached file metadata. Right now, it caches everything; we don't have to store all the
+ * protobuf structs actually, we could just store what we need, but that would require that
+ * ORC stop depending on them too. Luckily, they shouldn't be very big.
+ */
+public interface FileMetadata {
+  boolean isOriginalFormat();
+
+  List<StripeInformation> getStripes();
+
+  CompressionKind getCompressionKind();
+
+  int getCompressionBufferSize();
+
+  int getRowIndexStride();
+
+  int getColumnCount();
+
+  int getFlattenedColumnCount();
+
+  Object getFileKey();
+
+  List<Integer> getVersionList();
+
+  int getMetadataSize();
+
+  int getWriterVersionNum();
+
+  List<OrcProto.Type> getTypes();
+
+  List<OrcProto.StripeStatistics> getStripeStats();
+
+  long getContentLength();
+
+  long getNumberOfRows();
+
+  List<OrcProto.ColumnStatistics> getFileStats();
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java b/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java
new file mode 100644
index 0000000..00d17eb
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/IntegerColumnStatistics.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for all of the integer columns, such as byte, short, int, and
+ * long.
+ */
+public interface IntegerColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the smallest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the minimum
+   */
+  long getMinimum();
+
+  /**
+   * Get the largest value in the column. Only defined if getNumberOfValues
+   * is non-zero.
+   * @return the maximum
+   */
+  long getMaximum();
+
+  /**
+   * Is the sum defined? If the sum overflowed the counter this will be false.
+   * @return is the sum available
+   */
+  boolean isSumDefined();
+
+  /**
+   * Get the sum of the column. Only valid if isSumDefined returns true.
+   * @return the sum of the column
+   */
+  long getSum();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/OrcConf.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcConf.java b/orc/src/java/org/apache/hive/orc/OrcConf.java
new file mode 100644
index 0000000..dc2f865
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/OrcConf.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import org.apache.hadoop.conf.Configuration;
+
+import java.util.Properties;
+
+/**
+ * Define the configuration properties that Orc understands.
+ */
+public enum OrcConf {
+  STRIPE_SIZE("orc.stripe.size", "hive.exec.orc.default.stripe.size",
+      64L * 1024 * 1024,
+      "Define the default ORC stripe size, in bytes."),
+  BLOCK_SIZE("orc.block.size", "hive.exec.orc.default.block.size",
+      256L * 1024 * 1024,
+      "Define the default file system block size for ORC files."),
+  ENABLE_INDEXES("orc.create.index", "orc.create.index", true,
+      "Should the ORC writer create indexes as part of the file."),
+  ROW_INDEX_STRIDE("orc.row.index.stride",
+      "hive.exec.orc.default.row.index.stride", 10000,
+      "Define the default ORC index stride in number of rows. (Stride is the\n"+
+          " number of rows n index entry represents.)"),
+  BUFFER_SIZE("orc.compress.size", "hive.exec.orc.default.buffer.size",
+      256 * 1024, "Define the default ORC buffer size, in bytes."),
+  BASE_DELTA_RATIO("orc.base.delta.ratio", "hive.exec.orc.base.delta.ratio", 8,
+      "The ratio of base writer and delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."),
+  BLOCK_PADDING("orc.block.padding", "hive.exec.orc.default.block.padding",
+      true,
+      "Define whether stripes should be padded to the HDFS block boundaries."),
+  COMPRESS("orc.compress", "hive.exec.orc.default.compress", "ZLIB",
+      "Define the default compression codec for ORC file"),
+  WRITE_FORMAT("orc.write.format", "hive.exec.orc.write.format", "0.12",
+      "Define the version of the file to write. Possible values are 0.11 and\n"+
+          " 0.12. If this parameter is not defined, ORC will use the run\n" +
+          " length encoding (RLE) introduced in Hive 0.12."),
+  ENCODING_STRATEGY("orc.encoding.strategy", "hive.exec.orc.encoding.strategy",
+      "SPEED",
+      "Define the encoding strategy to use while writing data. Changing this\n"+
+          "will only affect the light weight encoding for integers. This\n" +
+          "flag will not change the compression level of higher level\n" +
+          "compression codec (like ZLIB)."),
+  COMPRESSION_STRATEGY("orc.compression.strategy",
+      "hive.exec.orc.compression.strategy", "SPEED",
+      "Define the compression strategy to use while writing data.\n" +
+          "This changes the compression level of higher level compression\n" +
+          "codec (like ZLIB)."),
+  BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
+      "hive.exec.orc.block.padding.tolerance", 0.05,
+      "Define the tolerance for block padding as a decimal fraction of\n" +
+          "stripe size (for example, the default value 0.05 is 5% of the\n" +
+          "stripe size). For the defaults of 64Mb ORC stripe and 256Mb HDFS\n" +
+          "blocks, the default block padding tolerance of 5% will\n" +
+          "reserve a maximum of 3.2Mb for padding within the 256Mb block.\n" +
+          "In that case, if the available size within the block is more than\n"+
+          "3.2Mb, a new smaller stripe will be inserted to fit within that\n" +
+          "space. This will make sure that no stripe written will block\n" +
+          " boundaries and cause remote reads within a node local task."),
+  BLOOM_FILTER_FPP("orc.bloom.filter.fpp", "orc.default.bloom.fpp", 0.05,
+      "Define the default false positive probability for bloom filters."),
+  USE_ZEROCOPY("orc.use.zerocopy", "hive.exec.orc.zerocopy", false,
+      "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
+  SKIP_CORRUPT_DATA("orc.skip.corrupt.data", "hive.exec.orc.skip.corrupt.data",
+      false,
+      "If ORC reader encounters corrupt data, this value will be used to\n" +
+          "determine whether to skip the corrupt data or throw exception.\n" +
+          "The default behavior is to throw exception."),
+  MEMORY_POOL("orc.memory.pool", "hive.exec.orc.memory.pool", 0.5,
+      "Maximum fraction of heap that can be used by ORC file writers"),
+  DICTIONARY_KEY_SIZE_THRESHOLD("orc.dictionary.key.threshold",
+      "hive.exec.orc.dictionary.key.size.threshold",
+      0.8,
+      "If the number of distinct keys in a dictionary is greater than this\n" +
+          "fraction of the total number of non-null rows, turn off \n" +
+          "dictionary encoding.  Use 1 to always use dictionary encoding."),
+  ROW_INDEX_STRIDE_DICTIONARY_CHECK("orc.dictionary.early.check",
+      "hive.orc.row.index.stride.dictionary.check",
+      true,
+      "If enabled dictionary check will happen after first row index stride\n" +
+          "(default 10000 rows) else dictionary check will happen before\n" +
+          "writing first stripe. In both cases, the decision to use\n" +
+          "dictionary or not will be retained thereafter."),
+  BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns", "orc.bloom.filter.columns",
+      "", "List of columns to create bloom filters for when writing.")
+  ;
+
+  private final String attribute;
+  private final String hiveConfName;
+  private final Object defaultValue;
+  private final String description;
+
+  OrcConf(String attribute,
+          String hiveConfName,
+          Object defaultValue,
+          String description) {
+    this.attribute = attribute;
+    this.hiveConfName = hiveConfName;
+    this.defaultValue = defaultValue;
+    this.description = description;
+  }
+
+  public String getAttribute() {
+    return attribute;
+  }
+
+  public String getHiveConfName() {
+    return hiveConfName;
+  }
+
+  public Object getDefaultValue() {
+    return defaultValue;
+  }
+
+  public String getDescription() {
+    return description;
+  }
+
+  private String lookupValue(Properties tbl, Configuration conf) {
+    String result = null;
+    if (tbl != null) {
+      result = tbl.getProperty(attribute);
+    }
+    if (result == null && conf != null) {
+      result = conf.get(attribute);
+      if (result == null) {
+        result = conf.get(hiveConfName);
+      }
+    }
+    return result;
+  }
+
+  public long getLong(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    if (value != null) {
+      return Long.parseLong(value);
+    }
+    return ((Number) defaultValue).longValue();
+  }
+
+  public long getLong(Configuration conf) {
+    return getLong(null, conf);
+  }
+
+  public String getString(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    return value == null ? (String) defaultValue : value;
+  }
+
+  public String getString(Configuration conf) {
+    return getString(null, conf);
+  }
+
+  public boolean getBoolean(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    if (value != null) {
+      return Boolean.parseBoolean(value);
+    }
+    return (Boolean) defaultValue;
+  }
+
+  public boolean getBoolean(Configuration conf) {
+    return getBoolean(null, conf);
+  }
+
+  public double getDouble(Properties tbl, Configuration conf) {
+    String value = lookupValue(tbl, conf);
+    if (value != null) {
+      return Double.parseDouble(value);
+    }
+    return ((Number) defaultValue).doubleValue();
+  }
+
+  public double getDouble(Configuration conf) {
+    return getDouble(null, conf);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcFile.java b/orc/src/java/org/apache/hive/orc/OrcFile.java
new file mode 100644
index 0000000..5670a61
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/OrcFile.java
@@ -0,0 +1,574 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hive.orc.impl.MemoryManager;
+import org.apache.hive.orc.impl.OrcTail;
+import org.apache.hive.orc.impl.ReaderImpl;
+import org.apache.hive.orc.impl.WriterImpl;
+
+/**
+ * Contains factory methods to read or write ORC files.
+ */
+public class OrcFile {
+  public static final String MAGIC = "ORC";
+
+  /**
+   * Create a version number for the ORC file format, so that we can add
+   * non-forward compatible changes in the future. To make it easier for users
+   * to understand the version numbers, we use the Hive release number that
+   * first wrote that version of ORC files.
+   *
+   * Thus, if you add new encodings or other non-forward compatible changes
+   * to ORC files, which prevent the old reader from reading the new format,
+   * you should change these variable to reflect the next Hive release number.
+   * Non-forward compatible changes should never be added in patch releases.
+   *
+   * Do not make any changes that break backwards compatibility, which would
+   * prevent the new reader from reading ORC files generated by any released
+   * version of Hive.
+   */
+  public enum Version {
+    V_0_11("0.11", 0, 11),
+    V_0_12("0.12", 0, 12);
+
+    public static final Version CURRENT = V_0_12;
+
+    private final String name;
+    private final int major;
+    private final int minor;
+
+    Version(String name, int major, int minor) {
+      this.name = name;
+      this.major = major;
+      this.minor = minor;
+    }
+
+    public static Version byName(String name) {
+      for(Version version: values()) {
+        if (version.name.equals(name)) {
+          return version;
+        }
+      }
+      throw new IllegalArgumentException("Unknown ORC version " + name);
+    }
+
+    /**
+     * Get the human readable name for the version.
+     */
+    public String getName() {
+      return name;
+    }
+
+    /**
+     * Get the major version number.
+     */
+    public int getMajor() {
+      return major;
+    }
+
+    /**
+     * Get the minor version number.
+     */
+    public int getMinor() {
+      return minor;
+    }
+  }
+
+  /**
+   * Records the version of the writer in terms of which bugs have been fixed.
+   * For bugs in the writer, but the old readers already read the new data
+   * correctly, bump this version instead of the Version.
+   */
+  public enum WriterVersion {
+    ORIGINAL(0),
+    HIVE_8732(1), // corrupted stripe/file maximum column statistics
+    HIVE_4243(2), // use real column names from Hive tables
+    HIVE_12055(3), // vectorized writer
+    HIVE_13083(4), // decimal writer updating present stream wrongly
+
+    // Don't use any magic numbers here except for the below:
+    FUTURE(Integer.MAX_VALUE); // a version from a future writer
+
+    private final int id;
+
+    public int getId() {
+      return id;
+    }
+
+    WriterVersion(int id) {
+      this.id = id;
+    }
+
+    private static final WriterVersion[] values;
+    static {
+      // Assumes few non-negative values close to zero.
+      int max = Integer.MIN_VALUE;
+      for (WriterVersion v : WriterVersion.values()) {
+        if (v.id < 0) throw new AssertionError();
+        if (v.id > max && FUTURE.id != v.id) {
+          max = v.id;
+        }
+      }
+      values = new WriterVersion[max + 1];
+      for (WriterVersion v : WriterVersion.values()) {
+        if (v.id < values.length) {
+          values[v.id] = v;
+        }
+      }
+    }
+
+    /**
+     * Convert the integer from OrcProto.PostScript.writerVersion
+     * to the enumeration with unknown versions being mapped to FUTURE.
+     * @param val the serialized writer version
+     * @return the corresponding enumeration value
+     */
+    public static WriterVersion from(int val) {
+      if (val >= values.length) {
+        return FUTURE;
+      }
+      return values[val];
+    }
+  }
+  public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_13083;
+
+  public enum EncodingStrategy {
+    SPEED, COMPRESSION
+  }
+
+  public enum CompressionStrategy {
+    SPEED, COMPRESSION
+  }
+
+  // unused
+  protected OrcFile() {}
+
+  public static class ReaderOptions {
+    private final Configuration conf;
+    private FileSystem filesystem;
+    private long maxLength = Long.MAX_VALUE;
+    private OrcTail orcTail;
+    // TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
+    // and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
+    // For now keeping this around to avoid complex surgery
+    private FileMetadata fileMetadata;
+
+    public ReaderOptions(Configuration conf) {
+      this.conf = conf;
+    }
+
+    public ReaderOptions filesystem(FileSystem fs) {
+      this.filesystem = fs;
+      return this;
+    }
+
+    public ReaderOptions maxLength(long val) {
+      maxLength = val;
+      return this;
+    }
+
+    public ReaderOptions orcTail(OrcTail tail) {
+      this.orcTail = tail;
+      return this;
+    }
+
+    public Configuration getConfiguration() {
+      return conf;
+    }
+
+    public FileSystem getFilesystem() {
+      return filesystem;
+    }
+
+    public long getMaxLength() {
+      return maxLength;
+    }
+
+    public OrcTail getOrcTail() {
+      return orcTail;
+    }
+
+    public ReaderOptions fileMetadata(final FileMetadata metadata) {
+      fileMetadata = metadata;
+      return this;
+    }
+
+    public FileMetadata getFileMetadata() {
+      return fileMetadata;
+    }
+  }
+
+  public static ReaderOptions readerOptions(Configuration conf) {
+    return new ReaderOptions(conf);
+  }
+
+  public static Reader createReader(Path path,
+                                    ReaderOptions options) throws IOException {
+    return new ReaderImpl(path, options);
+  }
+
+  public interface WriterContext {
+    Writer getWriter();
+  }
+
+  public interface WriterCallback {
+    void preStripeWrite(WriterContext context) throws IOException;
+    void preFooterWrite(WriterContext context) throws IOException;
+  }
+
+  /**
+   * Options for creating ORC file writers.
+   */
+  public static class WriterOptions {
+    private final Configuration configuration;
+    private FileSystem fileSystemValue = null;
+    private TypeDescription schema = null;
+    private long stripeSizeValue;
+    private long blockSizeValue;
+    private int rowIndexStrideValue;
+    private int bufferSizeValue;
+    private boolean enforceBufferSize = false;
+    private boolean blockPaddingValue;
+    private CompressionKind compressValue;
+    private MemoryManager memoryManagerValue;
+    private Version versionValue;
+    private WriterCallback callback;
+    private EncodingStrategy encodingStrategy;
+    private CompressionStrategy compressionStrategy;
+    private double paddingTolerance;
+    private String bloomFilterColumns;
+    private double bloomFilterFpp;
+
+    protected WriterOptions(Properties tableProperties, Configuration conf) {
+      configuration = conf;
+      memoryManagerValue = getStaticMemoryManager(conf);
+      stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
+      blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
+      rowIndexStrideValue =
+          (int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
+      bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties,
+          conf);
+      blockPaddingValue =
+          OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
+      compressValue =
+          CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
+              conf).toUpperCase());
+      String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
+          conf);
+      versionValue = Version.byName(versionName);
+      String enString = OrcConf.ENCODING_STRATEGY.getString(tableProperties,
+          conf);
+      encodingStrategy = EncodingStrategy.valueOf(enString);
+
+      String compString =
+          OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
+      compressionStrategy = CompressionStrategy.valueOf(compString);
+
+      paddingTolerance =
+          OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
+
+      bloomFilterColumns = OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
+          conf);
+      bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
+          conf);
+    }
+
+    /**
+     * Provide the filesystem for the path, if the client has it available.
+     * If it is not provided, it will be found from the path.
+     */
+    public WriterOptions fileSystem(FileSystem value) {
+      fileSystemValue = value;
+      return this;
+    }
+
+    /**
+     * Set the stripe size for the file. The writer stores the contents of the
+     * stripe in memory until this memory limit is reached and the stripe
+     * is flushed to the HDFS file and the next stripe started.
+     */
+    public WriterOptions stripeSize(long value) {
+      stripeSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Set the file system block size for the file. For optimal performance,
+     * set the block size to be multiple factors of stripe size.
+     */
+    public WriterOptions blockSize(long value) {
+      blockSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Set the distance between entries in the row index. The minimum value is
+     * 1000 to prevent the index from overwhelming the data. If the stride is
+     * set to 0, no indexes will be included in the file.
+     */
+    public WriterOptions rowIndexStride(int value) {
+      rowIndexStrideValue = value;
+      return this;
+    }
+
+    /**
+     * The size of the memory buffers used for compressing and storing the
+     * stripe in memory. NOTE: ORC writer may choose to use smaller buffer
+     * size based on stripe size and number of columns for efficient stripe
+     * writing and memory utilization. To enforce writer to use the requested
+     * buffer size use enforceBufferSize().
+     */
+    public WriterOptions bufferSize(int value) {
+      bufferSizeValue = value;
+      return this;
+    }
+
+    /**
+     * Enforce writer to use requested buffer size instead of estimating
+     * buffer size based on stripe size and number of columns.
+     * See bufferSize() method for more info.
+     * Default: false
+     */
+    public WriterOptions enforceBufferSize() {
+      enforceBufferSize = true;
+      return this;
+    }
+
+    /**
+     * Sets whether the HDFS blocks are padded to prevent stripes from
+     * straddling blocks. Padding improves locality and thus the speed of
+     * reading, but costs space.
+     */
+    public WriterOptions blockPadding(boolean value) {
+      blockPaddingValue = value;
+      return this;
+    }
+
+    /**
+     * Sets the encoding strategy that is used to encode the data.
+     */
+    public WriterOptions encodingStrategy(EncodingStrategy strategy) {
+      encodingStrategy = strategy;
+      return this;
+    }
+
+    /**
+     * Sets the tolerance for block padding as a percentage of stripe size.
+     */
+    public WriterOptions paddingTolerance(double value) {
+      paddingTolerance = value;
+      return this;
+    }
+
+    /**
+     * Comma separated values of column names for which bloom filter is to be created.
+     */
+    public WriterOptions bloomFilterColumns(String columns) {
+      bloomFilterColumns = columns;
+      return this;
+    }
+
+    /**
+     * Specify the false positive probability for bloom filter.
+     * @param fpp - false positive probability
+     * @return this
+     */
+    public WriterOptions bloomFilterFpp(double fpp) {
+      bloomFilterFpp = fpp;
+      return this;
+    }
+
+    /**
+     * Sets the generic compression that is used to compress the data.
+     */
+    public WriterOptions compress(CompressionKind value) {
+      compressValue = value;
+      return this;
+    }
+
+    /**
+     * Set the schema for the file. This is a required parameter.
+     * @param schema the schema for the file.
+     * @return this
+     */
+    public WriterOptions setSchema(TypeDescription schema) {
+      this.schema = schema;
+      return this;
+    }
+
+    /**
+     * Sets the version of the file that will be written.
+     */
+    public WriterOptions version(Version value) {
+      versionValue = value;
+      return this;
+    }
+
+    /**
+     * Add a listener for when the stripe and file are about to be closed.
+     * @param callback the object to be called when the stripe is closed
+     * @return this
+     */
+    public WriterOptions callback(WriterCallback callback) {
+      this.callback = callback;
+      return this;
+    }
+
+    /**
+     * A package local option to set the memory manager.
+     */
+    protected WriterOptions memory(MemoryManager value) {
+      memoryManagerValue = value;
+      return this;
+    }
+
+    public boolean getBlockPadding() {
+      return blockPaddingValue;
+    }
+
+    public long getBlockSize() {
+      return blockSizeValue;
+    }
+
+    public String getBloomFilterColumns() {
+      return bloomFilterColumns;
+    }
+
+    public FileSystem getFileSystem() {
+      return fileSystemValue;
+    }
+
+    public Configuration getConfiguration() {
+      return configuration;
+    }
+
+    public TypeDescription getSchema() {
+      return schema;
+    }
+
+    public long getStripeSize() {
+      return stripeSizeValue;
+    }
+
+    public CompressionKind getCompress() {
+      return compressValue;
+    }
+
+    public WriterCallback getCallback() {
+      return callback;
+    }
+
+    public Version getVersion() {
+      return versionValue;
+    }
+
+    public MemoryManager getMemoryManager() {
+      return memoryManagerValue;
+    }
+
+    public int getBufferSize() {
+      return bufferSizeValue;
+    }
+
+    public boolean isEnforceBufferSize() {
+      return enforceBufferSize;
+    }
+
+    public int getRowIndexStride() {
+      return rowIndexStrideValue;
+    }
+
+    public CompressionStrategy getCompressionStrategy() {
+      return compressionStrategy;
+    }
+
+    public EncodingStrategy getEncodingStrategy() {
+      return encodingStrategy;
+    }
+
+    public double getPaddingTolerance() {
+      return paddingTolerance;
+    }
+
+    public double getBloomFilterFpp() {
+      return bloomFilterFpp;
+    }
+  }
+
+  /**
+   * Create a set of writer options based on a configuration.
+   * @param conf the configuration to use for values
+   * @return A WriterOptions object that can be modified
+   */
+  public static WriterOptions writerOptions(Configuration conf) {
+    return new WriterOptions(null, conf);
+  }
+
+  /**
+   * Create a set of write options based on a set of table properties and
+   * configuration.
+   * @param tableProperties the properties of the table
+   * @param conf the configuration of the query
+   * @return a WriterOptions object that can be modified
+   */
+  public static WriterOptions writerOptions(Properties tableProperties,
+                                            Configuration conf) {
+    return new WriterOptions(tableProperties, conf);
+  }
+
+  private static ThreadLocal<MemoryManager> memoryManager = null;
+
+  private static synchronized MemoryManager getStaticMemoryManager(
+      final Configuration conf) {
+    if (memoryManager == null) {
+      memoryManager = new ThreadLocal<MemoryManager>() {
+        @Override
+        protected MemoryManager initialValue() {
+          return new MemoryManager(conf);
+        }
+      };
+    }
+    return memoryManager.get();
+  }
+
+  /**
+   * Create an ORC file writer. This is the public interface for creating
+   * writers going forward and new options will only be added to this method.
+   * @param path filename to write to
+   * @param opts the options
+   * @return a new ORC file writer
+   * @throws IOException
+   */
+  public static Writer createWriter(Path path,
+                                    WriterOptions opts
+                                    ) throws IOException {
+    FileSystem fs = opts.getFileSystem() == null ?
+        path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
+
+    return new WriterImpl(fs, path, opts);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/OrcUtils.java b/orc/src/java/org/apache/hive/orc/OrcUtils.java
new file mode 100644
index 0000000..12cb1f7
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/OrcUtils.java
@@ -0,0 +1,623 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hive.orc.impl.ReaderImpl;
+
+import com.google.common.collect.Lists;
+
+public class OrcUtils {
+
+  /**
+   * Returns selected columns as a boolean array with true value set for specified column names.
+   * The result will contain number of elements equal to flattened number of columns.
+   * For example:
+   * selectedColumns - a,b,c
+   * allColumns - a,b,c,d
+   * If column c is a complex type, say list<string> and other types are primitives then result will
+   * be [false, true, true, true, true, true, false]
+   * Index 0 is the root element of the struct which is set to false by default, index 1,2
+   * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
+   * index 5 correspond to column d. After flattening list<string> gets 2 columns.
+   *
+   * @param selectedColumns - comma separated list of selected column names
+   * @param schema       - object schema
+   * @return - boolean array with true value set for the specified column names
+   */
+  public static boolean[] includeColumns(String selectedColumns,
+                                         TypeDescription schema) {
+    int numFlattenedCols = schema.getMaximumId();
+    boolean[] results = new boolean[numFlattenedCols + 1];
+    if ("*".equals(selectedColumns)) {
+      Arrays.fill(results, true);
+      return results;
+    }
+    if (selectedColumns != null &&
+        schema.getCategory() == TypeDescription.Category.STRUCT) {
+      List<String> fieldNames = schema.getFieldNames();
+      List<TypeDescription> fields = schema.getChildren();
+      for (String column: selectedColumns.split((","))) {
+        TypeDescription col = findColumn(column, fieldNames, fields);
+        if (col != null) {
+          for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
+            results[i] = true;
+          }
+        }
+      }
+    }
+    return results;
+  }
+
+  private static TypeDescription findColumn(String columnName,
+                                            List<String> fieldNames,
+                                            List<TypeDescription> fields) {
+    int i = 0;
+    for(String fieldName: fieldNames) {
+      if (fieldName.equalsIgnoreCase(columnName)) {
+        return fields.get(i);
+      } else {
+        i += 1;
+      }
+    }
+    return null;
+  }
+
+  public static List<OrcProto.Type> getOrcTypes(TypeDescription typeDescr) {
+    List<OrcProto.Type> result = Lists.newArrayList();
+    appendOrcTypes(result, typeDescr);
+    return result;
+  }
+
+  private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr) {
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    List<TypeDescription> children = typeDescr.getChildren();
+    switch (typeDescr.getCategory()) {
+    case BOOLEAN:
+      type.setKind(OrcProto.Type.Kind.BOOLEAN);
+      break;
+    case BYTE:
+      type.setKind(OrcProto.Type.Kind.BYTE);
+      break;
+    case SHORT:
+      type.setKind(OrcProto.Type.Kind.SHORT);
+      break;
+    case INT:
+      type.setKind(OrcProto.Type.Kind.INT);
+      break;
+    case LONG:
+      type.setKind(OrcProto.Type.Kind.LONG);
+      break;
+    case FLOAT:
+      type.setKind(OrcProto.Type.Kind.FLOAT);
+      break;
+    case DOUBLE:
+      type.setKind(OrcProto.Type.Kind.DOUBLE);
+      break;
+    case STRING:
+      type.setKind(OrcProto.Type.Kind.STRING);
+      break;
+    case CHAR:
+      type.setKind(OrcProto.Type.Kind.CHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case VARCHAR:
+      type.setKind(OrcProto.Type.Kind.VARCHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case BINARY:
+      type.setKind(OrcProto.Type.Kind.BINARY);
+      break;
+    case TIMESTAMP:
+      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+      break;
+    case DATE:
+      type.setKind(OrcProto.Type.Kind.DATE);
+      break;
+    case DECIMAL:
+      type.setKind(OrcProto.Type.Kind.DECIMAL);
+      type.setPrecision(typeDescr.getPrecision());
+      type.setScale(typeDescr.getScale());
+      break;
+    case LIST:
+      type.setKind(OrcProto.Type.Kind.LIST);
+      type.addSubtypes(children.get(0).getId());
+      break;
+    case MAP:
+      type.setKind(OrcProto.Type.Kind.MAP);
+      for(TypeDescription t: children) {
+        type.addSubtypes(t.getId());
+      }
+      break;
+    case STRUCT:
+      type.setKind(OrcProto.Type.Kind.STRUCT);
+      for(TypeDescription t: children) {
+        type.addSubtypes(t.getId());
+      }
+      for(String field: typeDescr.getFieldNames()) {
+        type.addFieldNames(field);
+      }
+      break;
+    case UNION:
+      type.setKind(OrcProto.Type.Kind.UNION);
+      for(TypeDescription t: children) {
+        type.addSubtypes(t.getId());
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unknown category: " +
+          typeDescr.getCategory());
+    }
+    result.add(type.build());
+    if (children != null) {
+      for(TypeDescription child: children) {
+        appendOrcTypes(result, child);
+      }
+    }
+  }
+
+  /**
+   * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
+   * numbers based on the length of the result list being appended.
+   *
+   * @param result
+   * @param typeDescr
+   */
+  public static void appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
+      TypeDescription typeDescr) {
+
+    int subtype = result.size();
+    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
+    boolean needsAdd = true;
+    List<TypeDescription> children = typeDescr.getChildren();
+    switch (typeDescr.getCategory()) {
+    case BOOLEAN:
+      type.setKind(OrcProto.Type.Kind.BOOLEAN);
+      break;
+    case BYTE:
+      type.setKind(OrcProto.Type.Kind.BYTE);
+      break;
+    case SHORT:
+      type.setKind(OrcProto.Type.Kind.SHORT);
+      break;
+    case INT:
+      type.setKind(OrcProto.Type.Kind.INT);
+      break;
+    case LONG:
+      type.setKind(OrcProto.Type.Kind.LONG);
+      break;
+    case FLOAT:
+      type.setKind(OrcProto.Type.Kind.FLOAT);
+      break;
+    case DOUBLE:
+      type.setKind(OrcProto.Type.Kind.DOUBLE);
+      break;
+    case STRING:
+      type.setKind(OrcProto.Type.Kind.STRING);
+      break;
+    case CHAR:
+      type.setKind(OrcProto.Type.Kind.CHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case VARCHAR:
+      type.setKind(OrcProto.Type.Kind.VARCHAR);
+      type.setMaximumLength(typeDescr.getMaxLength());
+      break;
+    case BINARY:
+      type.setKind(OrcProto.Type.Kind.BINARY);
+      break;
+    case TIMESTAMP:
+      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+      break;
+    case DATE:
+      type.setKind(OrcProto.Type.Kind.DATE);
+      break;
+    case DECIMAL:
+      type.setKind(OrcProto.Type.Kind.DECIMAL);
+      type.setPrecision(typeDescr.getPrecision());
+      type.setScale(typeDescr.getScale());
+      break;
+    case LIST:
+      type.setKind(OrcProto.Type.Kind.LIST);
+      type.addSubtypes(++subtype);
+      result.add(type.build());
+      needsAdd = false;
+      appendOrcTypesRebuildSubtypes(result, children.get(0));
+      break;
+    case MAP:
+      {
+        // Make room for MAP type.
+        result.add(null);
+
+        // Add MAP type pair in order to determine their subtype values.
+        appendOrcTypesRebuildSubtypes(result, children.get(0));
+        int subtype2 = result.size();
+        appendOrcTypesRebuildSubtypes(result, children.get(1));
+        type.setKind(OrcProto.Type.Kind.MAP);
+        type.addSubtypes(subtype + 1);
+        type.addSubtypes(subtype2);
+        result.set(subtype, type.build());
+        needsAdd = false;
+      }
+      break;
+    case STRUCT:
+      {
+        List<String> fieldNames = typeDescr.getFieldNames();
+
+        // Make room for STRUCT type.
+        result.add(null);
+
+        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
+        for(TypeDescription child: children) {
+          int fieldSubtype = result.size();
+          fieldSubtypes.add(fieldSubtype);
+          appendOrcTypesRebuildSubtypes(result, child);
+        }
+
+        type.setKind(OrcProto.Type.Kind.STRUCT);
+
+        for (int i = 0 ; i < fieldNames.size(); i++) {
+          type.addSubtypes(fieldSubtypes.get(i));
+          type.addFieldNames(fieldNames.get(i));
+        }
+        result.set(subtype, type.build());
+        needsAdd = false;
+      }
+      break;
+    case UNION:
+      {
+        // Make room for UNION type.
+        result.add(null);
+
+        List<Integer> unionSubtypes = new ArrayList<Integer>(children.size());
+        for(TypeDescription child: children) {
+          int unionSubtype = result.size();
+          unionSubtypes.add(unionSubtype);
+          appendOrcTypesRebuildSubtypes(result, child);
+        }
+
+        type.setKind(OrcProto.Type.Kind.UNION);
+        for (int i = 0 ; i < children.size(); i++) {
+          type.addSubtypes(unionSubtypes.get(i));
+        }
+        result.set(subtype, type.build());
+        needsAdd = false;
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory());
+    }
+    if (needsAdd) {
+      result.add(type.build());
+    }
+  }
+
+  /**
+   * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype
+   * numbers based on the length of the result list being appended.
+   *
+   * @param result
+   * @param types
+   * @param columnId
+   */
+  public static int appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
+      List<OrcProto.Type> types, int columnId) {
+
+    OrcProto.Type oldType = types.get(columnId++);
+
+    int subtype = result.size();
+    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
+    boolean needsAdd = true;
+    switch (oldType.getKind()) {
+    case BOOLEAN:
+      builder.setKind(OrcProto.Type.Kind.BOOLEAN);
+      break;
+    case BYTE:
+      builder.setKind(OrcProto.Type.Kind.BYTE);
+      break;
+    case SHORT:
+      builder.setKind(OrcProto.Type.Kind.SHORT);
+      break;
+    case INT:
+      builder.setKind(OrcProto.Type.Kind.INT);
+      break;
+    case LONG:
+      builder.setKind(OrcProto.Type.Kind.LONG);
+      break;
+    case FLOAT:
+      builder.setKind(OrcProto.Type.Kind.FLOAT);
+      break;
+    case DOUBLE:
+      builder.setKind(OrcProto.Type.Kind.DOUBLE);
+      break;
+    case STRING:
+      builder.setKind(OrcProto.Type.Kind.STRING);
+      break;
+    case CHAR:
+      builder.setKind(OrcProto.Type.Kind.CHAR);
+      builder.setMaximumLength(oldType.getMaximumLength());
+      break;
+    case VARCHAR:
+      builder.setKind(OrcProto.Type.Kind.VARCHAR);
+      builder.setMaximumLength(oldType.getMaximumLength());
+      break;
+    case BINARY:
+      builder.setKind(OrcProto.Type.Kind.BINARY);
+      break;
+    case TIMESTAMP:
+      builder.setKind(OrcProto.Type.Kind.TIMESTAMP);
+      break;
+    case DATE:
+      builder.setKind(OrcProto.Type.Kind.DATE);
+      break;
+    case DECIMAL:
+      builder.setKind(OrcProto.Type.Kind.DECIMAL);
+      builder.setPrecision(oldType.getPrecision());
+      builder.setScale(oldType.getScale());
+      break;
+    case LIST:
+      builder.setKind(OrcProto.Type.Kind.LIST);
+      builder.addSubtypes(++subtype);
+      result.add(builder.build());
+      needsAdd = false;
+      columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+      break;
+    case MAP:
+      {
+        // Make room for MAP type.
+        result.add(null);
+
+        // Add MAP type pair in order to determine their subtype values.
+        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        int subtype2 = result.size();
+        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        builder.setKind(OrcProto.Type.Kind.MAP);
+        builder.addSubtypes(subtype + 1);
+        builder.addSubtypes(subtype2);
+        result.set(subtype, builder.build());
+        needsAdd = false;
+      }
+      break;
+    case STRUCT:
+      {
+        List<String> fieldNames = oldType.getFieldNamesList();
+
+        // Make room for STRUCT type.
+        result.add(null);
+
+        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
+        for(int i = 0 ; i < fieldNames.size(); i++) {
+          int fieldSubtype = result.size();
+          fieldSubtypes.add(fieldSubtype);
+          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        }
+
+        builder.setKind(OrcProto.Type.Kind.STRUCT);
+
+        for (int i = 0 ; i < fieldNames.size(); i++) {
+          builder.addSubtypes(fieldSubtypes.get(i));
+          builder.addFieldNames(fieldNames.get(i));
+        }
+        result.set(subtype, builder.build());
+        needsAdd = false;
+      }
+      break;
+    case UNION:
+      {
+        int subtypeCount = oldType.getSubtypesCount();
+
+        // Make room for UNION type.
+        result.add(null);
+
+        List<Integer> unionSubtypes = new ArrayList<Integer>(subtypeCount);
+        for(int i = 0 ; i < subtypeCount; i++) {
+          int unionSubtype = result.size();
+          unionSubtypes.add(unionSubtype);
+          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
+        }
+
+        builder.setKind(OrcProto.Type.Kind.UNION);
+        for (int i = 0 ; i < subtypeCount; i++) {
+          builder.addSubtypes(unionSubtypes.get(i));
+        }
+        result.set(subtype, builder.build());
+        needsAdd = false;
+      }
+      break;
+    default:
+      throw new IllegalArgumentException("Unknown category: " + oldType.getKind());
+    }
+    if (needsAdd) {
+      result.add(builder.build());
+    }
+    return columnId;
+  }
+
+  /**
+   * Translate the given rootColumn from the list of types to a TypeDescription.
+   * @param types all of the types
+   * @param rootColumn translate this type
+   * @return a new TypeDescription that matches the given rootColumn
+   */
+  public static
+        TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
+                                                int rootColumn) {
+    OrcProto.Type type = types.get(rootColumn);
+    switch (type.getKind()) {
+      case BOOLEAN:
+        return TypeDescription.createBoolean();
+      case BYTE:
+        return TypeDescription.createByte();
+      case SHORT:
+        return TypeDescription.createShort();
+      case INT:
+        return TypeDescription.createInt();
+      case LONG:
+        return TypeDescription.createLong();
+      case FLOAT:
+        return TypeDescription.createFloat();
+      case DOUBLE:
+        return TypeDescription.createDouble();
+      case STRING:
+        return TypeDescription.createString();
+      case CHAR:
+      case VARCHAR: {
+        TypeDescription result = type.getKind() == OrcProto.Type.Kind.CHAR ?
+            TypeDescription.createChar() : TypeDescription.createVarchar();
+        if (type.hasMaximumLength()) {
+          result.withMaxLength(type.getMaximumLength());
+        }
+        return result;
+      }
+      case BINARY:
+        return TypeDescription.createBinary();
+      case TIMESTAMP:
+        return TypeDescription.createTimestamp();
+      case DATE:
+        return TypeDescription.createDate();
+      case DECIMAL: {
+        TypeDescription result = TypeDescription.createDecimal();
+        if (type.hasScale()) {
+          result.withScale(type.getScale());
+        }
+        if (type.hasPrecision()) {
+          result.withPrecision(type.getPrecision());
+        }
+        return result;
+      }
+      case LIST:
+        return TypeDescription.createList(
+            convertTypeFromProtobuf(types, type.getSubtypes(0)));
+      case MAP:
+        return TypeDescription.createMap(
+            convertTypeFromProtobuf(types, type.getSubtypes(0)),
+            convertTypeFromProtobuf(types, type.getSubtypes(1)));
+      case STRUCT: {
+        TypeDescription result = TypeDescription.createStruct();
+        for(int f=0; f < type.getSubtypesCount(); ++f) {
+          result.addField(type.getFieldNames(f),
+              convertTypeFromProtobuf(types, type.getSubtypes(f)));
+        }
+        return result;
+      }
+      case UNION: {
+        TypeDescription result = TypeDescription.createUnion();
+        for(int f=0; f < type.getSubtypesCount(); ++f) {
+          result.addUnionChild(
+              convertTypeFromProtobuf(types, type.getSubtypes(f)));
+        }
+        return result;
+      }
+    }
+    throw new IllegalArgumentException("Unknown ORC type " + type.getKind());
+  }
+
+  public static List<StripeInformation> convertProtoStripesToStripes(
+      List<OrcProto.StripeInformation> stripes) {
+    List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
+    for (OrcProto.StripeInformation info : stripes) {
+      result.add(new ReaderImpl.StripeInformationImpl(info));
+    }
+    return result;
+  }
+
+  public static List<TypeDescription> setTypeBuilderFromSchema(
+      OrcProto.Type.Builder type, TypeDescription schema) {
+    List<TypeDescription> children = schema.getChildren();
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        type.setKind(OrcProto.Type.Kind.BOOLEAN);
+        break;
+      case BYTE:
+        type.setKind(OrcProto.Type.Kind.BYTE);
+        break;
+      case SHORT:
+        type.setKind(OrcProto.Type.Kind.SHORT);
+        break;
+      case INT:
+        type.setKind(OrcProto.Type.Kind.INT);
+        break;
+      case LONG:
+        type.setKind(OrcProto.Type.Kind.LONG);
+        break;
+      case FLOAT:
+        type.setKind(OrcProto.Type.Kind.FLOAT);
+        break;
+      case DOUBLE:
+        type.setKind(OrcProto.Type.Kind.DOUBLE);
+        break;
+      case STRING:
+        type.setKind(OrcProto.Type.Kind.STRING);
+        break;
+      case CHAR:
+        type.setKind(OrcProto.Type.Kind.CHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case VARCHAR:
+        type.setKind(OrcProto.Type.Kind.VARCHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case BINARY:
+        type.setKind(OrcProto.Type.Kind.BINARY);
+        break;
+      case TIMESTAMP:
+        type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+        break;
+      case DATE:
+        type.setKind(OrcProto.Type.Kind.DATE);
+        break;
+      case DECIMAL:
+        type.setKind(OrcProto.Type.Kind.DECIMAL);
+        type.setPrecision(schema.getPrecision());
+        type.setScale(schema.getScale());
+        break;
+      case LIST:
+        type.setKind(OrcProto.Type.Kind.LIST);
+        type.addSubtypes(children.get(0).getId());
+        break;
+      case MAP:
+        type.setKind(OrcProto.Type.Kind.MAP);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
+        break;
+      case STRUCT:
+        type.setKind(OrcProto.Type.Kind.STRUCT);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
+        for(String field: schema.getFieldNames()) {
+          type.addFieldNames(field);
+        }
+        break;
+      case UNION:
+        type.setKind(OrcProto.Type.Kind.UNION);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown category: " +
+          schema.getCategory());
+    }
+    return children;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/Reader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/Reader.java b/orc/src/java/org/apache/hive/orc/Reader.java
new file mode 100644
index 0000000..61ce186
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/Reader.java
@@ -0,0 +1,375 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+
+/**
+ * The interface for reading ORC files.
+ *
+ * One Reader can support multiple concurrent RecordReader.
+ */
+public interface Reader {
+
+  /**
+   * Get the number of rows in the file.
+   * @return the number of rows
+   */
+  long getNumberOfRows();
+
+  /**
+   * Get the deserialized data size of the file
+   * @return raw data size
+   */
+  long getRawDataSize();
+
+  /**
+   * Get the deserialized data size of the specified columns
+   * @param colNames
+   * @return raw data size of columns
+   */
+  long getRawDataSizeOfColumns(List<String> colNames);
+
+  /**
+   * Get the deserialized data size of the specified columns ids
+   * @param colIds - internal column id (check orcfiledump for column ids)
+   * @return raw data size of columns
+   */
+  long getRawDataSizeFromColIndices(List<Integer> colIds);
+
+  /**
+   * Get the user metadata keys.
+   * @return the set of metadata keys
+   */
+  List<String> getMetadataKeys();
+
+  /**
+   * Get a user metadata value.
+   * @param key a key given by the user
+   * @return the bytes associated with the given key
+   */
+  ByteBuffer getMetadataValue(String key);
+
+  /**
+   * Did the user set the given metadata value.
+   * @param key the key to check
+   * @return true if the metadata value was set
+   */
+  boolean hasMetadataValue(String key);
+
+  /**
+   * Get the compression kind.
+   * @return the kind of compression in the file
+   */
+  CompressionKind getCompressionKind();
+
+  /**
+   * Get the buffer size for the compression.
+   * @return number of bytes to buffer for the compression codec.
+   */
+  int getCompressionSize();
+
+  /**
+   * Get the number of rows per a entry in the row index.
+   * @return the number of rows per an entry in the row index or 0 if there
+   * is no row index.
+   */
+  int getRowIndexStride();
+
+  /**
+   * Get the list of stripes.
+   * @return the information about the stripes in order
+   */
+  List<StripeInformation> getStripes();
+
+  /**
+   * Get the length of the file.
+   * @return the number of bytes in the file
+   */
+  long getContentLength();
+
+  /**
+   * Get the statistics about the columns in the file.
+   * @return the information about the column
+   */
+  ColumnStatistics[] getStatistics();
+
+  /**
+   * Get the type of rows in this ORC file.
+   */
+  TypeDescription getSchema();
+
+  /**
+   * Get the list of types contained in the file. The root type is the first
+   * type in the list.
+   * @return the list of flattened types
+   * @deprecated use getSchema instead
+   */
+  List<OrcProto.Type> getTypes();
+
+  /**
+   * Get the file format version.
+   */
+  OrcFile.Version getFileVersion();
+
+  /**
+   * Get the version of the writer of this file.
+   */
+  OrcFile.WriterVersion getWriterVersion();
+
+  /**
+   * Get the file tail (footer + postscript)
+   *
+   * @return - file tail
+   */
+  OrcProto.FileTail getFileTail();
+
+  /**
+   * Options for creating a RecordReader.
+   */
+  public static class Options {
+    private boolean[] include;
+    private long offset = 0;
+    private long length = Long.MAX_VALUE;
+    private SearchArgument sarg = null;
+    private String[] columnNames = null;
+    private Boolean useZeroCopy = null;
+    private Boolean skipCorruptRecords = null;
+    private TypeDescription schema = null;
+    private DataReader dataReader = null;
+
+    /**
+     * Set the list of columns to read.
+     * @param include a list of columns to read
+     * @return this
+     */
+    public Options include(boolean[] include) {
+      this.include = include;
+      return this;
+    }
+
+    /**
+     * Set the range of bytes to read
+     * @param offset the starting byte offset
+     * @param length the number of bytes to read
+     * @return this
+     */
+    public Options range(long offset, long length) {
+      this.offset = offset;
+      this.length = length;
+      return this;
+    }
+
+    /**
+     * Set the schema on read type description.
+     */
+    public Options schema(TypeDescription schema) {
+      this.schema = schema;
+      return this;
+    }
+
+    /**
+     * Set search argument for predicate push down.
+     * @param sarg the search argument
+     * @param columnNames the column names for
+     * @return this
+     */
+    public Options searchArgument(SearchArgument sarg, String[] columnNames) {
+      this.sarg = sarg;
+      this.columnNames = columnNames;
+      return this;
+    }
+
+    /**
+     * Set whether to use zero copy from HDFS.
+     * @param value the new zero copy flag
+     * @return this
+     */
+    public Options useZeroCopy(boolean value) {
+      this.useZeroCopy = value;
+      return this;
+    }
+
+    public Options dataReader(DataReader value) {
+      this.dataReader = value;
+      return this;
+    }
+
+    /**
+     * Set whether to skip corrupt records.
+     * @param value the new skip corrupt records flag
+     * @return this
+     */
+    public Options skipCorruptRecords(boolean value) {
+      this.skipCorruptRecords = value;
+      return this;
+    }
+
+    public boolean[] getInclude() {
+      return include;
+    }
+
+    public long getOffset() {
+      return offset;
+    }
+
+    public long getLength() {
+      return length;
+    }
+
+    public TypeDescription getSchema() {
+      return schema;
+    }
+
+    public SearchArgument getSearchArgument() {
+      return sarg;
+    }
+
+    public String[] getColumnNames() {
+      return columnNames;
+    }
+
+    public long getMaxOffset() {
+      long result = offset + length;
+      if (result < 0) {
+        result = Long.MAX_VALUE;
+      }
+      return result;
+    }
+
+    public Boolean getUseZeroCopy() {
+      return useZeroCopy;
+    }
+
+    public Boolean getSkipCorruptRecords() {
+      return skipCorruptRecords;
+    }
+
+    public DataReader getDataReader() {
+      return dataReader;
+    }
+
+    public Options clone() {
+      Options result = new Options();
+      result.include = include;
+      result.offset = offset;
+      result.length = length;
+      result.sarg = sarg;
+      result.schema = schema;
+      result.columnNames = columnNames;
+      result.useZeroCopy = useZeroCopy;
+      result.skipCorruptRecords = skipCorruptRecords;
+      result.dataReader = dataReader == null ? null : dataReader.clone();
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buffer = new StringBuilder();
+      buffer.append("{include: ");
+      if (include == null) {
+        buffer.append("null");
+      } else {
+        buffer.append("[");
+        for(int i=0; i < include.length; ++i) {
+          if (i != 0) {
+            buffer.append(", ");
+          }
+          buffer.append(include[i]);
+        }
+        buffer.append("]");
+      }
+      buffer.append(", offset: ");
+      buffer.append(offset);
+      buffer.append(", length: ");
+      buffer.append(length);
+      if (sarg != null) {
+        buffer.append(", sarg: ");
+        buffer.append(sarg.toString());
+        buffer.append(", columns: [");
+        for(int i=0; i < columnNames.length; ++i) {
+          if (i != 0) {
+            buffer.append(", ");
+          }
+          buffer.append("'");
+          buffer.append(columnNames[i]);
+          buffer.append("'");
+        }
+        buffer.append("]");
+      }
+      if (schema != null) {
+        buffer.append(", schema: ");
+        schema.printToBuffer(buffer);
+      }
+      buffer.append("}");
+      return buffer.toString();
+    }
+  }
+
+  /**
+   * Create a RecordReader that reads everything with the default options.
+   * @return a new RecordReader
+   * @throws IOException
+   */
+  RecordReader rows() throws IOException;
+
+  /**
+   * Create a RecordReader that uses the options given.
+   * This method can't be named rows, because many callers used rows(null)
+   * before the rows() method was introduced.
+   * @param options the options to read with
+   * @return a new RecordReader
+   * @throws IOException
+   */
+  RecordReader rows(Options options) throws IOException;
+
+  /**
+   * @return List of integers representing version of the file, in order from major to minor.
+   */
+  List<Integer> getVersionList();
+
+  /**
+   * @return Gets the size of metadata, in bytes.
+   */
+  int getMetadataSize();
+
+  /**
+   * @return Stripe statistics, in original protobuf form.
+   */
+  List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
+
+  /**
+   * @return Stripe statistics.
+   */
+  List<StripeStatistics> getStripeStatistics() throws IOException;
+
+  /**
+   * @return File statistics, in original protobuf form.
+   */
+  List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
+
+  /**
+   * @return Serialized file metadata read from disk for the purposes of caching, etc.
+   */
+  ByteBuffer getSerializedFileFooter();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/RecordReader.java b/orc/src/java/org/apache/hive/orc/RecordReader.java
new file mode 100644
index 0000000..f86fa0e
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/RecordReader.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * A row-by-row iterator for ORC files.
+ */
+public interface RecordReader {
+  /**
+   * Read the next row batch. The size of the batch to read cannot be
+   * controlled by the callers. Caller need to look at
+   * VectorizedRowBatch.size of the retunred object to know the batch
+   * size read.
+   * @param batch a row batch object to read into
+   * @return were more rows available to read?
+   * @throws java.io.IOException
+   */
+  boolean nextBatch(VectorizedRowBatch batch) throws IOException;
+
+  /**
+   * Get the row number of the row that will be returned by the following
+   * call to next().
+   * @return the row number from 0 to the number of rows in the file
+   * @throws java.io.IOException
+   */
+  long getRowNumber() throws IOException;
+
+  /**
+   * Get the progress of the reader through the rows.
+   * @return a fraction between 0.0 and 1.0 of rows read
+   * @throws java.io.IOException
+   */
+  float getProgress() throws IOException;
+
+  /**
+   * Release the resources associated with the given reader.
+   * @throws java.io.IOException
+   */
+  void close() throws IOException;
+
+  /**
+   * Seek to a particular row number.
+   */
+  void seekToRow(long rowCount) throws IOException;
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java b/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java
new file mode 100644
index 0000000..8a81413
--- /dev/null
+++ b/orc/src/java/org/apache/hive/orc/StringColumnStatistics.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc;
+
+/**
+ * Statistics for string columns.
+ */
+public interface StringColumnStatistics extends ColumnStatistics {
+  /**
+   * Get the minimum string.
+   * @return the minimum
+   */
+  String getMinimum();
+
+  /**
+   * Get the maximum string.
+   * @return the maximum
+   */
+  String getMaximum();
+
+  /**
+   * Get the total length of all strings
+   * @return the sum (total length)
+   */
+  long getSum();
+}