You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tajo.apache.org by ji...@apache.org on 2016/03/23 02:41:46 UTC

[1/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Repository: tajo
Updated Branches:
  refs/heads/master 9fcc9fd3a -> 682635852


http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/WriterImpl.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/WriterImpl.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/WriterImpl.java
index 833d102..e0ad3d7 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/WriterImpl.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/WriterImpl.java
@@ -19,7 +19,6 @@
 package org.apache.tajo.storage.thirdparty.orc;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
 import com.google.common.primitives.Longs;
 import com.google.protobuf.ByteString;
@@ -30,21 +29,20 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.tajo.datum.*;
-import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.storage.thirdparty.orc.CompressionCodec.Modifier;
-import org.apache.tajo.storage.thirdparty.orc.OrcProto.RowIndexEntry;
-import org.apache.tajo.storage.thirdparty.orc.OrcProto.StripeStatistics;
-import org.apache.tajo.storage.thirdparty.orc.OrcProto.Type;
-import org.apache.tajo.storage.thirdparty.orc.OrcProto.UserMetadataItem;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.Text;
+import org.apache.orc.*;
+import org.apache.orc.CompressionCodec.Modifier;
+import org.apache.orc.OrcProto.RowIndexEntry;
+import org.apache.orc.OrcUtils;
+import org.apache.orc.impl.*;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.Inet4Datum;
+import org.apache.tajo.datum.Int4Datum;
+import org.apache.tajo.datum.Int8Datum;
+import org.apache.tajo.storage.Tuple;
+import org.apache.tajo.storage.thirdparty.orc.OrcFile.*;
+import org.apache.tajo.util.datetime.DateTimeConstants;
 import org.apache.tajo.util.datetime.DateTimeUtil;
 
 import java.io.IOException;
@@ -94,10 +92,11 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   private final boolean addBlockPadding;
   private final int bufferSize;
   private final long blockSize;
-  private final float paddingTolerance;
+  private final double paddingTolerance;
+  private final TypeDescription schema;
+
   // the streams that make up the current stripe
-  private final Map<StreamName, BufferedStream> streams =
-          new TreeMap<>();
+  private final Map<StreamName, BufferedStream> streams = new TreeMap<>();
 
   private FSDataOutputStream rawWriter = null;
   // the compressed metadata information outStream
@@ -111,47 +110,32 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   private long rawDataSize = 0;
   private int rowsInIndex = 0;
   private int stripesAtLastFlush = -1;
-  private final List<OrcProto.StripeInformation> stripes =
-          new ArrayList<>();
-  private final Map<String, ByteString> userMetadata =
-          new TreeMap<>();
+  private final List<OrcProto.StripeInformation> stripes = new ArrayList<>();
+  private final Map<String, ByteString> userMetadata = new TreeMap<>();
+  private final StreamFactory streamFactory = new StreamFactory();
   private final TreeWriter treeWriter;
   private final boolean buildIndex;
   private final MemoryManager memoryManager;
-  private final OrcFile.Version version;
+  private final Version version;
   private final Configuration conf;
-  private final OrcFile.WriterCallback callback;
-  private final OrcFile.WriterContext callbackContext;
-  private final OrcFile.EncodingStrategy encodingStrategy;
-  private final OrcFile.CompressionStrategy compressionStrategy;
+  private final WriterCallback callback;
+  private final WriterContext callbackContext;
+  private final EncodingStrategy encodingStrategy;
+  private final CompressionStrategy compressionStrategy;
   private final boolean[] bloomFilterColumns;
   private final double bloomFilterFpp;
   private boolean writeTimeZone;
   private TimeZone timeZone;
 
-  WriterImpl(FileSystem fs,
-      Path path,
-      Configuration conf,
-      ObjectInspector inspector,
-      long stripeSize,
-      CompressionKind compress,
-      int bufferSize,
-      int rowIndexStride,
-      MemoryManager memoryManager,
-      boolean addBlockPadding,
-      OrcFile.Version version,
-      OrcFile.WriterCallback callback,
-      OrcFile.EncodingStrategy encodingStrategy,
-      OrcFile.CompressionStrategy compressionStrategy,
-      float paddingTolerance,
-      long blockSizeValue,
-      String bloomFilterColumnNames,
-      double bloomFilterFpp,
-      TimeZone timeZone) throws IOException {
+  public WriterImpl(FileSystem fs,
+                    Path path,
+                    OrcFile.WriterOptions opts,
+                    TimeZone timeZone) throws IOException {
     this.fs = fs;
     this.path = path;
-    this.conf = conf;
-    this.callback = callback;
+    this.conf = opts.getConfiguration();
+    this.callback = opts.getCallback();
+    this.schema = opts.getSchema();
     if (callback != null) {
       callbackContext = new OrcFile.WriterContext(){
 
@@ -163,100 +147,60 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     } else {
       callbackContext = null;
     }
-    this.adjustedStripeSize = stripeSize;
-    this.defaultStripeSize = stripeSize;
-    this.version = version;
-    this.encodingStrategy = encodingStrategy;
-    this.compressionStrategy = compressionStrategy;
-    this.addBlockPadding = addBlockPadding;
-    this.blockSize = blockSizeValue;
-    this.paddingTolerance = paddingTolerance;
-    this.compress = compress;
-    this.rowIndexStride = rowIndexStride;
-    this.memoryManager = memoryManager;
-    this.timeZone = timeZone;
+    this.adjustedStripeSize = opts.getStripeSize();
+    this.defaultStripeSize = opts.getStripeSize();
+    this.version = opts.getVersion();
+    this.encodingStrategy = opts.getEncodingStrategy();
+    this.compressionStrategy = opts.getCompressionStrategy();
+    this.addBlockPadding = opts.getBlockPadding();
+    this.blockSize = opts.getBlockSize();
+    this.paddingTolerance = opts.getPaddingTolerance();
+    this.compress = opts.getCompress();
+    this.rowIndexStride = opts.getRowIndexStride();
+    this.memoryManager = opts.getMemoryManager();
     buildIndex = rowIndexStride > 0;
     codec = createCodec(compress);
-    String allColumns = conf.get(IOConstants.COLUMNS);
-    if (allColumns == null) {
-      allColumns = getColumnNamesFromInspector(inspector);
-    }
-    this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize);
+    int numColumns = schema.getMaximumId() + 1;
+    this.bufferSize = getEstimatedBufferSize(defaultStripeSize,
+        numColumns, opts.getBufferSize());
     if (version == OrcFile.Version.V_0_11) {
       /* do not write bloom filters for ORC v11 */
-      this.bloomFilterColumns =
-          OrcUtils.includeColumns(null, allColumns, inspector);
+      this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
     } else {
       this.bloomFilterColumns =
-          OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector);
+          OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
     }
-    this.bloomFilterFpp = bloomFilterFpp;
-    treeWriter = createTreeWriter(inspector, new StreamFactory(), false);
+    this.bloomFilterFpp = opts.getBloomFilterFpp();
+    this.timeZone = timeZone;
+    treeWriter = createTreeWriter(schema, streamFactory, false);
     if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
       throw new IllegalArgumentException("Row stride must be at least " +
           MIN_ROW_INDEX_STRIDE);
     }
 
     // ensure that we are able to handle callbacks before we register ourselves
-    memoryManager.addWriter(path, stripeSize, this);
-  }
-
-  private String getColumnNamesFromInspector(ObjectInspector inspector) {
-    List<String> fieldNames = Lists.newArrayList();
-    Joiner joiner = Joiner.on(",");
-    if (inspector instanceof StructObjectInspector) {
-      StructObjectInspector soi = (StructObjectInspector) inspector;
-      List<? extends StructField> fields = soi.getAllStructFieldRefs();
-      for(StructField sf : fields) {
-        fieldNames.add(sf.getFieldName());
-      }
-    }
-    return joiner.join(fieldNames);
+    memoryManager.addWriter(path, opts.getStripeSize(), this);
   }
 
   @VisibleForTesting
-  int getEstimatedBufferSize(int bs) {
-      return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs);
-  }
-
-  int getEstimatedBufferSize(String colNames, int bs) {
-    long availableMem = getMemoryAvailableForORC();
-    if (colNames != null) {
-      final int numCols = colNames.split(",").length;
-      if (numCols > COLUMN_COUNT_THRESHOLD) {
-        // In BufferedStream, there are 3 outstream buffers (compressed,
-        // uncompressed and overflow) and list of previously compressed buffers.
-        // Since overflow buffer is rarely used, lets consider only 2 allocation.
-        // Also, initially, the list of compression buffers will be empty.
-        final int outStreamBuffers = codec == null ? 1 : 2;
-
-        // max possible streams per column is 5. For string columns, there is
-        // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams.
-        final int maxStreams = 5;
-
-        // Lets assume 10% memory for holding dictionary in memory and other
-        // object allocations
-        final long miscAllocation = (long) (0.1f * availableMem);
-
-        // compute the available memory
-        final long remainingMem = availableMem - miscAllocation;
-
-        int estBufferSize = (int) (remainingMem /
-            (maxStreams * outStreamBuffers * numCols));
-        estBufferSize = getClosestBufferSize(estBufferSize, bs);
-        if (estBufferSize > bs) {
-          estBufferSize = bs;
-        }
-
-        LOG.info("WIDE TABLE - Number of columns: " + numCols +
-            " Chosen compression buffer size: " + estBufferSize);
-        return estBufferSize;
-      }
+  public static int getEstimatedBufferSize(long stripeSize, int numColumns,
+                                           int bs) {
+    // The worst case is that there are 2 big streams per a column and
+    // we want to guarantee that each stream gets ~10 buffers.
+    // This keeps buffers small enough that we don't get really small stripe
+    // sizes.
+    int estBufferSize = (int) (stripeSize / (20 * numColumns));
+    estBufferSize = getClosestBufferSize(estBufferSize);
+    if (estBufferSize > bs) {
+      estBufferSize = bs;
+    } else {
+      LOG.info("WIDE TABLE - Number of columns: " + numColumns +
+          " Chosen compression buffer size: " + estBufferSize);
     }
-    return bs;
+    return estBufferSize;
   }
 
-  private int getClosestBufferSize(int estBufferSize, int bs) {
+  private static int getClosestBufferSize(int estBufferSize) {
     final int kb4 = 4 * 1024;
     final int kb8 = 8 * 1024;
     final int kb16 = 16 * 1024;
@@ -616,8 +560,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
    */
   private abstract static class TreeWriter {
     protected final int id;
-    protected final ObjectInspector inspector;
-    private final BitFieldWriter isPresent;
+    protected final BitFieldWriter isPresent;
     private final boolean isCompressed;
     protected final ColumnStatisticsImpl indexStatistics;
     protected final ColumnStatisticsImpl stripeColStatistics;
@@ -634,24 +577,24 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final OrcProto.BloomFilter.Builder bloomFilterEntry;
     private boolean foundNulls;
     private OutStream isPresentOutStream;
-    private final List<StripeStatistics.Builder> stripeStatsBuilders;
+    private final List<OrcProto.StripeStatistics.Builder> stripeStatsBuilders;
     private final StreamFactory streamFactory;
 
     /**
      * Create a tree writer.
      * @param columnId the column id of the column to write
-     * @param inspector the object inspector to use
+     * @param schema the row schema
      * @param streamFactory limited access to the Writer's data.
      * @param nullable can the value be null?
      * @throws IOException
      */
-    TreeWriter(int columnId, ObjectInspector inspector,
+    TreeWriter(int columnId,
+               TypeDescription schema,
                StreamFactory streamFactory,
                boolean nullable) throws IOException {
       this.streamFactory = streamFactory;
       this.isCompressed = streamFactory.isCompressed();
       this.id = columnId;
-      this.inspector = inspector;
       if (nullable) {
         isPresentOutStream = streamFactory.createStream(id,
             OrcProto.Stream.Kind.PRESENT);
@@ -661,9 +604,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       }
       this.foundNulls = false;
       createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
-      indexStatistics = ColumnStatisticsImpl.create(inspector);
-      stripeColStatistics = ColumnStatisticsImpl.create(inspector);
-      fileStatistics = ColumnStatisticsImpl.create(inspector);
+      indexStatistics = ColumnStatisticsImpl.create(schema);
+      stripeColStatistics = ColumnStatisticsImpl.create(schema);
+      fileStatistics = ColumnStatisticsImpl.create(schema);
       childrenWriters = new TreeWriter[0];
       rowIndex = OrcProto.RowIndex.newBuilder();
       rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
@@ -912,10 +855,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final BitFieldWriter writer;
 
     BooleanTreeWriter(int columnId,
-                      ObjectInspector inspector,
+                      TypeDescription schema,
                       StreamFactory writer,
                       boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, schema, writer, nullable);
       PositionedOutputStream out = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.writer = new BitFieldWriter(out, 1);
@@ -927,7 +870,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       super.write(datum);
       if (datum != null && datum.isNotNull()) {
         boolean val = datum.asBool();
-        indexStatistics.updateBoolean(val);
+        indexStatistics.updateBoolean(val, 1);
         writer.write(val ? 1 : 0);
       }
     }
@@ -951,10 +894,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final RunLengthByteWriter writer;
 
     ByteTreeWriter(int columnId,
-                      ObjectInspector inspector,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+                   TypeDescription schema,
+                   StreamFactory writer,
+                   boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
       this.writer = new RunLengthByteWriter(writer.createStream(id,
           OrcProto.Stream.Kind.DATA));
       recordPosition(rowIndexPosition);
@@ -965,7 +908,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       super.write(datum);
       if (datum != null && datum.isNotNull()) {
         byte val = datum.asByte();
-        indexStatistics.updateInteger(val);
+        indexStatistics.updateInteger(val, 1);
         if (createBloomFilter) {
           bloomFilter.addLong(val);
         }
@@ -993,10 +936,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private boolean isDirectV2 = true;
 
     IntegerTreeWriter(int columnId,
-                      ObjectInspector inspector,
+                      TypeDescription schema,
                       StreamFactory writer,
                       boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, schema, writer, nullable);
       OutStream out = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.isDirectV2 = isNewWriteFormat(writer);
@@ -1026,7 +969,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
         } else {
           val = datum.asInt2();
         }
-        indexStatistics.updateInteger(val);
+        indexStatistics.updateInteger(val, 1);
         if (createBloomFilter) {
           // integers are converted to longs in column statistics and during SARG evaluation
           bloomFilter.addLong(val);
@@ -1055,10 +998,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final SerializationUtils utils;
 
     FloatTreeWriter(int columnId,
-                      ObjectInspector inspector,
-                      StreamFactory writer,
-                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+                    TypeDescription schema,
+                    StreamFactory writer,
+                    boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
       this.stream = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.utils = new SerializationUtils();
@@ -1099,10 +1042,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final SerializationUtils utils;
 
     DoubleTreeWriter(int columnId,
-                    ObjectInspector inspector,
-                    StreamFactory writer,
-                    boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+                     TypeDescription schema,
+                     StreamFactory writer,
+                     boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
       this.stream = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.utils = new SerializationUtils();
@@ -1137,33 +1080,33 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
-  private static class StringTreeWriter extends TreeWriter {
+  private static abstract class StringBaseTreeWriter extends TreeWriter {
     private static final int INITIAL_DICTIONARY_SIZE = 4096;
     private final OutStream stringOutput;
     private final IntegerWriter lengthOutput;
     private final IntegerWriter rowOutput;
-    private final StringRedBlackTree dictionary =
+    protected final StringRedBlackTree dictionary =
         new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
-    private final DynamicIntArray rows = new DynamicIntArray();
-    private final PositionedOutputStream directStreamOutput;
-    private final IntegerWriter directLengthOutput;
-    private final List<RowIndexEntry> savedRowIndex =
-            new ArrayList<>();
+    protected final DynamicIntArray rows = new DynamicIntArray();
+    protected final PositionedOutputStream directStreamOutput;
+    protected final IntegerWriter directLengthOutput;
+    private final List<OrcProto.RowIndexEntry> savedRowIndex =
+        new ArrayList<OrcProto.RowIndexEntry>();
     private final boolean buildIndex;
-    private final List<Long> rowIndexValueCount = new ArrayList<>();
+    private final List<Long> rowIndexValueCount = new ArrayList<Long>();
     // If the number of keys in a dictionary is greater than this fraction of
     //the total number of non-null rows, turn off dictionary encoding
-    private final float dictionaryKeySizeThreshold;
-    private boolean useDictionaryEncoding = true;
+    private final double dictionaryKeySizeThreshold;
+    protected boolean useDictionaryEncoding = true;
     private boolean isDirectV2 = true;
     private boolean doneDictionaryCheck;
-    private final boolean strideDictionaryCheck;
+    protected final boolean strideDictionaryCheck;
 
-    StringTreeWriter(int columnId,
-                     ObjectInspector inspector,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+    StringBaseTreeWriter(int columnId,
+                         TypeDescription schema,
+                         StreamFactory writer,
+                         boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
       stringOutput = writer.createStream(id,
           OrcProto.Stream.Kind.DICTIONARY_DATA);
@@ -1177,33 +1120,14 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
       directLengthOutput = createIntegerWriter(writer.createStream(id,
           OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
-      dictionaryKeySizeThreshold = writer.getConfiguration().getFloat(
-          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname,
-          OrcConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.defaultFloatVal);
-      strideDictionaryCheck = writer.getConfiguration().getBoolean(
-          OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname,
-          OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.defaultBoolVal);
+      Configuration conf = writer.getConfiguration();
+      dictionaryKeySizeThreshold =
+          org.apache.orc.OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf);
+      strideDictionaryCheck =
+          org.apache.orc.OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf);
       doneDictionaryCheck = false;
     }
 
-    @Override
-    void write(Datum datum) throws IOException {
-      super.write(datum);
-      if (datum != null && datum.isNotNull()) {
-        if (useDictionaryEncoding || !strideDictionaryCheck) {
-          rows.add(dictionary.add(datum.toString()));
-        } else {
-          // write data and length
-          directStreamOutput.write(datum.asByteArray(), 0, datum.size());
-          directLengthOutput.write(datum.size());
-        }
-        indexStatistics.updateString(datum.toString());
-        if (createBloomFilter) {
-          bloomFilter.addBytes(datum.asByteArray(), datum.size());
-        }
-      }
-    }
-
     private boolean checkDictionaryEncoding() {
       if (!doneDictionaryCheck) {
         // Set the flag indicating whether or not to use dictionary encoding
@@ -1269,7 +1193,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
           private int currentId = 0;
           @Override
           public void visit(StringRedBlackTree.VisitorContext context
-                           ) throws IOException {
+          ) throws IOException {
             context.writeBytes(stringOutput);
             lengthOutput.write(context.getLength());
             dumpOrder[context.getOriginalPosition()] = currentId++;
@@ -1383,29 +1307,76 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
+  private static class StringTreeWriter extends StringBaseTreeWriter {
+    StringTreeWriter(int columnId,
+                     TypeDescription schema,
+                     StreamFactory writer,
+                     boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+    }
+
+    @Override
+    void write(Datum datum) throws IOException {
+      super.write(datum);
+      if (datum != null && datum.isNotNull()) {
+        if (useDictionaryEncoding || !strideDictionaryCheck) {
+          rows.add(dictionary.add(datum.toString()));
+        } else {
+          // write data and length
+          directStreamOutput.write(datum.asByteArray(), 0, datum.size());
+          directLengthOutput.write(datum.size());
+        }
+        byte[] buf = datum.asByteArray();
+        indexStatistics.updateString(buf, 0, buf.length, 1);
+        if (createBloomFilter) {
+          bloomFilter.addBytes(buf, 0, buf.length);
+        }
+      }
+    }
+  }
+
   /**
    * Under the covers, char is written to ORC the same way as string.
    */
   private static class CharTreeWriter extends StringTreeWriter {
+    private final int itemLength;
+    private final byte[] padding;
 
     CharTreeWriter(int columnId,
-        ObjectInspector inspector,
-        StreamFactory writer,
-        boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+                   TypeDescription schema,
+                   StreamFactory writer,
+                   boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
+      itemLength = schema.getMaxLength();
+      padding = new byte[itemLength];
     }
-  }
 
-  /**
-   * Under the covers, varchar is written to ORC the same way as string.
-   */
-  private static class VarcharTreeWriter extends StringTreeWriter {
+    @Override
+    void write(Datum datum) throws IOException {
+      super.write(datum);
+      if (datum != null && datum.isNotNull()) {
+        byte[] ptr;
+        byte[] buf = datum.asByteArray();
+        if (buf.length >= itemLength) {
+          ptr = buf;
+        } else {
+          ptr = padding;
+          System.arraycopy(buf, 0, ptr, 0, buf.length);
+          Arrays.fill(ptr, buf.length, itemLength, (byte) ' ');
+        }
+        if (useDictionaryEncoding || !strideDictionaryCheck) {
+          rows.add(dictionary.add(ptr, 0, itemLength));
+        } else {
+          // write data and length
+          directStreamOutput.write(ptr, 0, itemLength);
+          directLengthOutput.write(itemLength);
+        }
 
-    VarcharTreeWriter(int columnId,
-        ObjectInspector inspector,
-        StreamFactory writer,
-        boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+        indexStatistics.updateString(ptr, 0, ptr.length, 1);
+        if (createBloomFilter) {
+          bloomFilter.addBytes(ptr, 0, ptr.length);
+        }
+      }
     }
   }
 
@@ -1415,10 +1386,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private boolean isDirectV2 = true;
 
     BinaryTreeWriter(int columnId,
-                     ObjectInspector inspector,
+                     TypeDescription schema,
                      StreamFactory writer,
                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, schema, writer, nullable);
       this.stream = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.isDirectV2 = isNewWriteFormat(writer);
@@ -1441,11 +1412,12 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     void write(Datum datum) throws IOException {
       super.write(datum);
       if (datum != null && datum.isNotNull()) {
-        stream.write(datum.asByteArray(), 0, datum.size());
+        byte[] buf = datum.asByteArray();
+        stream.write(buf, 0, buf.length);
         length.write(datum.size());
-        indexStatistics.updateBinary(datum);
+        indexStatistics.updateBinary(buf, 0, buf.length, 1);
         if (createBloomFilter) {
-          bloomFilter.addBytes(datum.asByteArray(), datum.size());
+          bloomFilter.addBytes(buf, 0, buf.length);
         }
       }
     }
@@ -1467,7 +1439,6 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
-  static final int MILLIS_PER_SECOND = 1000;
   static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
 
   private static class TimestampTreeWriter extends TreeWriter {
@@ -1478,10 +1449,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private TimeZone timeZone;
 
     TimestampTreeWriter(int columnId,
-                     ObjectInspector inspector,
-                     StreamFactory writer,
-                     boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+                        TypeDescription schema,
+                        StreamFactory writer,
+                        boolean nullable) throws IOException {
+      super(columnId, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
       this.seconds = createIntegerWriter(writer.createStream(id,
           OrcProto.Stream.Kind.DATA), true, isDirectV2, writer);
@@ -1489,7 +1460,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
           OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer);
       recordPosition(rowIndexPosition);
       // for unit tests to set different time zones
-      this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND;
+      this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / DateTimeConstants.MSECS_PER_SEC;
       writer.useWriterTimeZone(true);
       timeZone = writer.getTimeZone();
     }
@@ -1515,7 +1486,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
         Timestamp val = new Timestamp(javaTimestamp);
         indexStatistics.updateTimestamp(val);
-        seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp);
+        seconds.write((val.getTime() / DateTimeConstants.MSECS_PER_SEC) - base_timestamp);
         nanos.write(formatNanos(val.getNanos()));
         if (createBloomFilter) {
           bloomFilter.addLong(val.getTime());
@@ -1561,12 +1532,12 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final boolean isDirectV2;
 
     DateTreeWriter(int columnId,
-                   ObjectInspector inspector,
+                   TypeDescription schema,
                    StreamFactory writer,
                    boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, schema, writer, nullable);
       OutStream out = writer.createStream(id,
-        OrcProto.Stream.Kind.DATA);
+          OrcProto.Stream.Kind.DATA);
       this.isDirectV2 = isNewWriteFormat(writer);
       this.writer = createIntegerWriter(out, true, isDirectV2, writer);
       recordPosition(rowIndexPosition);
@@ -1612,19 +1583,17 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   private static class StructTreeWriter extends TreeWriter {
-    private final List<? extends StructField> fields;
     StructTreeWriter(int columnId,
-                     ObjectInspector inspector,
+                     TypeDescription schema,
                      StreamFactory writer,
                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
-      StructObjectInspector structObjectInspector =
-        (StructObjectInspector) inspector;
-      fields = structObjectInspector.getAllStructFieldRefs();
-      childrenWriters = new TreeWriter[fields.size()];
+      super(columnId, schema, writer, nullable);
+      List<TypeDescription> children = schema.getChildren();
+      childrenWriters = new TreeWriter[children.size()];
       for(int i=0; i < childrenWriters.length; ++i) {
         childrenWriters[i] = createTreeWriter(
-          fields.get(i).getFieldObjectInspector(), writer, true);
+            children.get(i), writer,
+            true);
       }
       recordPosition(rowIndexPosition);
     }
@@ -1636,9 +1605,8 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     void writeTuple(Tuple tuple) throws IOException {
       super.write(tuple);
       if (tuple != null) {
-        for(int i = 0; i < fields.size(); ++i) {
-          TreeWriter writer = childrenWriters[i];
-          writer.write(tuple.asDatum(i));
+        for(int i = 0; i < childrenWriters.length; ++i) {
+          childrenWriters[i].write(tuple.asDatum(i));
         }
       }
     }
@@ -1654,159 +1622,136 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
-  private static TreeWriter createTreeWriter(ObjectInspector inspector,
+  private static TreeWriter createTreeWriter(TypeDescription schema,
                                              StreamFactory streamFactory,
                                              boolean nullable) throws IOException {
-    switch (inspector.getCategory()) {
-      case PRIMITIVE:
-        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
-          case BOOLEAN:
-          case VOID:
-            return new BooleanTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case BYTE:
-            return new ByteTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case SHORT:
-          case INT:
-          case LONG:
-            return new IntegerTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case FLOAT:
-            return new FloatTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case DOUBLE:
-            return new DoubleTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case STRING:
-            return new StringTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case CHAR:
-            return new CharTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case VARCHAR:
-            return new VarcharTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case BINARY:
-            return new BinaryTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case TIMESTAMP:
-            return new TimestampTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case DATE:
-            return new DateTreeWriter(streamFactory.getNextColumnId(),
-              inspector, streamFactory, nullable);
-          default:
-            throw new IllegalArgumentException("Bad primitive category " +
-              ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
-        }
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        return new BooleanTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case BYTE:
+        return new ByteTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case SHORT:
+      case INT:
+      case LONG:
+        return new IntegerTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case FLOAT:
+        return new FloatTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case DOUBLE:
+        return new DoubleTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case STRING:
+        return new StringTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case CHAR:
+        return new CharTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case BINARY:
+        return new BinaryTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case TIMESTAMP:
+        return new TimestampTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
+      case DATE:
+        return new DateTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
       case STRUCT:
-        return new StructTreeWriter(streamFactory.getNextColumnId(), inspector,
-            streamFactory, nullable);
+        return new StructTreeWriter(streamFactory.getNextColumnId(),
+            schema, streamFactory, nullable);
       default:
         throw new IllegalArgumentException("Bad category: " +
-          inspector.getCategory());
+            schema.getCategory());
     }
   }
 
   private static void writeTypes(OrcProto.Footer.Builder builder,
-                                 TreeWriter treeWriter) {
+                                 TypeDescription schema) {
     OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    switch (treeWriter.inspector.getCategory()) {
-      case PRIMITIVE:
-        switch (((PrimitiveObjectInspector) treeWriter.inspector).
-                 getPrimitiveCategory()) {
-          case VOID:
-          case BOOLEAN:
-            type.setKind(OrcProto.Type.Kind.BOOLEAN);
-            break;
-          case BYTE:
-            type.setKind(OrcProto.Type.Kind.BYTE);
-            break;
-          case SHORT:
-            type.setKind(OrcProto.Type.Kind.SHORT);
-            break;
-          case INT:
-            type.setKind(OrcProto.Type.Kind.INT);
-            break;
-          case LONG:
-            type.setKind(OrcProto.Type.Kind.LONG);
-            break;
-          case FLOAT:
-            type.setKind(OrcProto.Type.Kind.FLOAT);
-            break;
-          case DOUBLE:
-            type.setKind(OrcProto.Type.Kind.DOUBLE);
-            break;
-          case STRING:
-            type.setKind(OrcProto.Type.Kind.STRING);
-            break;
-          case CHAR:
-            // The char length needs to be written to file and should be available
-            // from the object inspector
-            CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
-            type.setKind(Type.Kind.CHAR);
-            type.setMaximumLength(charTypeInfo.getLength());
-            break;
-          case VARCHAR:
-            // The varchar length needs to be written to file and should be available
-            // from the object inspector
-            VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
-            type.setKind(Type.Kind.VARCHAR);
-            type.setMaximumLength(typeInfo.getLength());
-            break;
-          case BINARY:
-            type.setKind(OrcProto.Type.Kind.BINARY);
-            break;
-          case TIMESTAMP:
-            type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-            break;
-          case DATE:
-            type.setKind(OrcProto.Type.Kind.DATE);
-            break;
-          case DECIMAL:
-            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)((PrimitiveObjectInspector)treeWriter.inspector).getTypeInfo();
-            type.setKind(OrcProto.Type.Kind.DECIMAL);
-            type.setPrecision(decTypeInfo.precision());
-            type.setScale(decTypeInfo.scale());
-            break;
-          default:
-            throw new IllegalArgumentException("Unknown primitive category: " +
-              ((PrimitiveObjectInspector) treeWriter.inspector).
-                getPrimitiveCategory());
-        }
+    List<TypeDescription> children = schema.getChildren();
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        type.setKind(OrcProto.Type.Kind.BOOLEAN);
+        break;
+      case BYTE:
+        type.setKind(OrcProto.Type.Kind.BYTE);
+        break;
+      case SHORT:
+        type.setKind(OrcProto.Type.Kind.SHORT);
+        break;
+      case INT:
+        type.setKind(OrcProto.Type.Kind.INT);
+        break;
+      case LONG:
+        type.setKind(OrcProto.Type.Kind.LONG);
+        break;
+      case FLOAT:
+        type.setKind(OrcProto.Type.Kind.FLOAT);
+        break;
+      case DOUBLE:
+        type.setKind(OrcProto.Type.Kind.DOUBLE);
+        break;
+      case STRING:
+        type.setKind(OrcProto.Type.Kind.STRING);
+        break;
+      case CHAR:
+        type.setKind(OrcProto.Type.Kind.CHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case VARCHAR:
+        type.setKind(OrcProto.Type.Kind.VARCHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case BINARY:
+        type.setKind(OrcProto.Type.Kind.BINARY);
+        break;
+      case TIMESTAMP:
+        type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+        break;
+      case DATE:
+        type.setKind(OrcProto.Type.Kind.DATE);
+        break;
+      case DECIMAL:
+        type.setKind(OrcProto.Type.Kind.DECIMAL);
+        type.setPrecision(schema.getPrecision());
+        type.setScale(schema.getScale());
         break;
       case LIST:
         type.setKind(OrcProto.Type.Kind.LIST);
-        type.addSubtypes(treeWriter.childrenWriters[0].id);
+        type.addSubtypes(children.get(0).getId());
         break;
       case MAP:
         type.setKind(OrcProto.Type.Kind.MAP);
-        type.addSubtypes(treeWriter.childrenWriters[0].id);
-        type.addSubtypes(treeWriter.childrenWriters[1].id);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
         break;
       case STRUCT:
         type.setKind(OrcProto.Type.Kind.STRUCT);
-        for(TreeWriter child: treeWriter.childrenWriters) {
-          type.addSubtypes(child.id);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
         }
-        for(StructField field: ((StructTreeWriter) treeWriter).fields) {
-          type.addFieldNames(field.getFieldName());
+        for(String field: schema.getFieldNames()) {
+          type.addFieldNames(field);
         }
         break;
       case UNION:
         type.setKind(OrcProto.Type.Kind.UNION);
-        for(TreeWriter child: treeWriter.childrenWriters) {
-          type.addSubtypes(child.id);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
         }
         break;
       default:
         throw new IllegalArgumentException("Unknown category: " +
-          treeWriter.inspector.getCategory());
+            schema.getCategory());
     }
     builder.addTypes(type);
-    for(TreeWriter child: treeWriter.childrenWriters) {
-      writeTypes(builder, child);
+    if (children != null) {
+      for(TypeDescription child: children) {
+        writeTypes(builder, child);
+      }
     }
   }
 
@@ -1853,9 +1798,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
           StreamName name = pair.getKey();
           long streamSize = pair.getValue().getOutputSize();
           builder.addStreams(OrcProto.Stream.newBuilder()
-                             .setColumn(name.getColumn())
-                             .setKind(name.getKind())
-                             .setLength(streamSize));
+              .setColumn(name.getColumn())
+              .setKind(name.getKind())
+              .setLength(streamSize));
           if (StreamName.Area.INDEX == name.getArea()) {
             indexSize += streamSize;
           } else {
@@ -1880,8 +1825,8 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
         // and user specified padding tolerance. Since stripe size can overflow
         // the default stripe size we should apply this correction to avoid
         // writing portion of last stripe to next hdfs block.
-        float correction = overflow > 0 ? (float) overflow
-            / (float) adjustedStripeSize : 0.0f;
+        double correction = overflow > 0 ? (double) overflow
+            / (double) adjustedStripeSize : 0.0;
 
         // correction should not be greater than user specified padding
         // tolerance
@@ -1939,75 +1884,60 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   private long computeRawDataSize() {
-    long result = 0;
-    for (TreeWriter child : treeWriter.getChildrenWriters()) {
-      result += getRawDataSizeFromInspectors(child, child.inspector);
-    }
-    return result;
+    return getRawDataSize(treeWriter, schema);
   }
 
-  private long getRawDataSizeFromInspectors(TreeWriter child, ObjectInspector oi) {
+  private long getRawDataSize(TreeWriter child,
+                              TypeDescription schema) {
     long total = 0;
-    switch (oi.getCategory()) {
-    case PRIMITIVE:
-      total += getRawDataSizeFromPrimitives(child, oi);
-      break;
-    case LIST:
-    case MAP:
-    case UNION:
-    case STRUCT:
-      for (TreeWriter tw : child.childrenWriters) {
-        total += getRawDataSizeFromInspectors(tw, tw.inspector);
+    long numVals = child.fileStatistics.getNumberOfValues();
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case FLOAT:
+        return numVals * JavaDataModel.get().primitive1();
+      case LONG:
+      case DOUBLE:
+        return numVals * JavaDataModel.get().primitive2();
+      case STRING:
+      case VARCHAR:
+      case CHAR:
+        // ORC strings are converted to java Strings. so use JavaDataModel to
+        // compute the overall size of strings
+        StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
+        numVals = numVals == 0 ? 1 : numVals;
+        int avgStringLen = (int) (scs.getSum() / numVals);
+        return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
+      case DECIMAL:
+        return numVals * JavaDataModel.get().lengthOfDecimal();
+      case DATE:
+        return numVals * JavaDataModel.get().lengthOfDate();
+      case BINARY:
+        // get total length of binary blob
+        BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
+        return bcs.getSum();
+      case TIMESTAMP:
+        return numVals * JavaDataModel.get().lengthOfTimestamp();
+      case LIST:
+      case MAP:
+      case UNION:
+      case STRUCT: {
+        TreeWriter[] childWriters = child.getChildrenWriters();
+        List<TypeDescription> childTypes = schema.getChildren();
+        for (int i=0; i < childWriters.length; ++i) {
+          total += getRawDataSize(childWriters[i], childTypes.get(i));
+        }
+        break;
       }
-      break;
-    default:
-      LOG.debug("Unknown object inspector category.");
-      break;
+      default:
+        LOG.debug("Unknown object inspector category.");
+        break;
     }
     return total;
   }
 
-  private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
-    long result = 0;
-    long numVals = child.fileStatistics.getNumberOfValues();
-    switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case FLOAT:
-      return numVals * JavaDataModel.get().primitive1();
-    case LONG:
-    case DOUBLE:
-      return numVals * JavaDataModel.get().primitive2();
-    case STRING:
-    case VARCHAR:
-    case CHAR:
-      // ORC strings are converted to java Strings. so use JavaDataModel to
-      // compute the overall size of strings
-      child = (StringTreeWriter) child;
-      StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
-      numVals = numVals == 0 ? 1 : numVals;
-      int avgStringLen = (int) (scs.getSum() / numVals);
-      return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
-    case DECIMAL:
-      return numVals * JavaDataModel.get().lengthOfDecimal();
-    case DATE:
-      return numVals * JavaDataModel.get().lengthOfDate();
-    case BINARY:
-      // get total length of binary blob
-      BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
-      return bcs.getSum();
-    case TIMESTAMP:
-      return numVals * JavaDataModel.get().lengthOfTimestamp();
-    default:
-      LOG.debug("Unknown primitive category.");
-      break;
-    }
-
-    return result;
-  }
-
   private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
     switch (kind) {
       case NONE: return OrcProto.CompressionKind.NONE;
@@ -2027,7 +1957,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
   }
 
-  private int writeMetadata(long bodyLength) throws IOException {
+  private int writeMetadata() throws IOException {
     getStream();
     OrcProto.Metadata.Builder builder = OrcProto.Metadata.newBuilder();
     for(OrcProto.StripeStatistics.Builder ssb : treeWriter.stripeStatsBuilders) {
@@ -2052,7 +1982,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     // populate raw data size
     rawDataSize = computeRawDataSize();
     // serialize the types
-    writeTypes(builder, treeWriter);
+    writeTypes(builder, schema);
     // add the stripe information
     for(OrcProto.StripeInformation stripe: stripes) {
       builder.addStripes(stripe);
@@ -2062,7 +1992,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     // add all of the user metadata
     for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) {
       builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
-        .setName(entry.getKey()).setValue(entry.getValue()));
+          .setName(entry.getKey()).setValue(entry.getValue()));
     }
     long startPosn = rawWriter.getPos();
     OrcProto.Footer footer = builder.build();
@@ -2074,14 +2004,14 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
   private int writePostScript(int footerLength, int metadataLength) throws IOException {
     OrcProto.PostScript.Builder builder =
-      OrcProto.PostScript.newBuilder()
-        .setCompression(writeCompressionKind(compress))
-        .setFooterLength(footerLength)
-        .setMetadataLength(metadataLength)
-        .setMagic(OrcFile.MAGIC)
-        .addVersion(version.getMajor())
-        .addVersion(version.getMinor())
-        .setWriterVersion(OrcFile.WriterVersion.HIVE_8732.getId());
+        OrcProto.PostScript.newBuilder()
+            .setCompression(writeCompressionKind(compress))
+            .setFooterLength(footerLength)
+            .setMetadataLength(metadataLength)
+            .setMagic(OrcFile.MAGIC)
+            .addVersion(version.getMajor())
+            .addVersion(version.getMinor())
+            .setWriterVersion(OrcFile.CURRENT_WRITER.getId());
     if (compress != CompressionKind.NONE) {
       builder.setCompressionBlockSize(bufferSize);
     }
@@ -2120,7 +2050,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
         createRowIndexEntry();
       }
     }
-    memoryManager.addedRow();
+    memoryManager.addedRow(1);
   }
 
   @Override
@@ -2132,7 +2062,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     memoryManager.removeWriter(path);
     // actually close the file
     flushStripe();
-    int metadataLength = writeMetadata(rawWriter.getPos());
+    int metadataLength = writeMetadata();
     int footerLength = writeFooter(rawWriter.getPos() - metadataLength);
     rawWriter.writeByte(writePostScript(footerLength, metadataLength));
     rawWriter.close();
@@ -2165,19 +2095,19 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       if (callback != null) {
         callback.preFooterWrite(callbackContext);
       }
-      int metaLength = writeMetadata(rawWriter.getPos());
+      int metaLength = writeMetadata();
       int footLength = writeFooter(rawWriter.getPos() - metaLength);
       rawWriter.writeByte(writePostScript(footLength, metaLength));
       stripesAtLastFlush = stripes.size();
-      ShimLoader.getHadoopShims().hflush(rawWriter);
+      rawWriter.hflush();
     }
     return rawWriter.getPos();
   }
 
   @Override
   public void appendStripe(byte[] stripe, int offset, int length,
-      StripeInformation stripeInfo,
-      OrcProto.StripeStatistics stripeStatistics) throws IOException {
+                           StripeInformation stripeInfo,
+                           OrcProto.StripeStatistics stripeStatistics) throws IOException {
     checkArgument(stripe != null, "Stripe must not be null");
     checkArgument(length <= stripe.length,
         "Specified length must not be greater specified array length");
@@ -2187,12 +2117,11 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     getStream();
     long start = rawWriter.getPos();
-    long stripeLen = length;
     long availBlockSpace = blockSize - (start % blockSize);
 
     // see if stripe can fit in the current hdfs block, else pad the remaining
     // space in the block
-    if (stripeLen < blockSize && stripeLen > availBlockSpace &&
+    if (length < blockSize && length > availBlockSpace &&
         addBlockPadding) {
       byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, availBlockSpace)];
       LOG.info(String.format("Padding ORC by %d bytes while merging..",
@@ -2245,7 +2174,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   private void getAllColumnTreeWritersImpl(TreeWriter tw,
-      List<TreeWriter> result) {
+                                           List<TreeWriter> result) {
     result.add(tw);
     for (TreeWriter child : tw.childrenWriters) {
       getAllColumnTreeWritersImpl(child, result);
@@ -2253,9 +2182,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   @Override
-  public void appendUserMetadata(List<UserMetadataItem> userMetadata) {
+  public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata) {
     if (userMetadata != null) {
-      for (UserMetadataItem item : userMetadata) {
+      for (OrcProto.UserMetadataItem item : userMetadata) {
         this.userMetadata.put(item.getName(), item.getValue());
       }
     }

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZeroCopyAdapter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZeroCopyAdapter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZeroCopyAdapter.java
new file mode 100644
index 0000000..2886fe7
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZeroCopyAdapter.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.thirdparty.orc;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.ReadOption;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+public class ZeroCopyAdapter {
+  private final FSDataInputStream in;
+  private final ByteBufferPoolAdapter pool;
+  private final static EnumSet<ReadOption> CHECK_SUM = EnumSet
+      .noneOf(ReadOption.class);
+  private final static EnumSet<ReadOption> NO_CHECK_SUM = EnumSet
+      .of(ReadOption.SKIP_CHECKSUMS);
+
+  public ZeroCopyAdapter(FSDataInputStream in, ByteBufferAllocatorPool poolshim) {
+    this.in = in;
+    if (poolshim != null) {
+      pool = new ByteBufferPoolAdapter(poolshim);
+    } else {
+      pool = null;
+    }
+  }
+
+  public final ByteBuffer readBuffer(int maxLength, boolean verifyChecksums)
+      throws IOException {
+    EnumSet<ReadOption> options = NO_CHECK_SUM;
+    if (verifyChecksums) {
+      options = CHECK_SUM;
+    }
+    return this.in.read(this.pool, maxLength, options);
+  }
+
+  public final void releaseBuffer(ByteBuffer buffer) {
+    this.in.releaseBuffer(buffer);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZlibCodec.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZlibCodec.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZlibCodec.java
deleted file mode 100644
index d0a8fa7..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ZlibCodec.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
-import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-
-import javax.annotation.Nullable;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-import java.util.zip.DataFormatException;
-import java.util.zip.Deflater;
-import java.util.zip.Inflater;
-
-class ZlibCodec implements CompressionCodec, DirectDecompressionCodec {
-
-  private Boolean direct = null;
-
-  private final int level;
-  private final int strategy;
-
-  public ZlibCodec() {
-    level = Deflater.DEFAULT_COMPRESSION;
-    strategy = Deflater.DEFAULT_STRATEGY;
-  }
-
-  private ZlibCodec(int level, int strategy) {
-    this.level = level;
-    this.strategy = strategy;
-  }
-
-  @Override
-  public boolean compress(ByteBuffer in, ByteBuffer out,
-                          ByteBuffer overflow) throws IOException {
-    Deflater deflater = new Deflater(level, true);
-    deflater.setStrategy(strategy);
-    int length = in.remaining();
-    deflater.setInput(in.array(), in.arrayOffset() + in.position(), length);
-    deflater.finish();
-    int outSize = 0;
-    int offset = out.arrayOffset() + out.position();
-    while (!deflater.finished() && (length > outSize)) {
-      int size = deflater.deflate(out.array(), offset, out.remaining());
-      out.position(size + out.position());
-      outSize += size;
-      offset += size;
-      // if we run out of space in the out buffer, use the overflow
-      if (out.remaining() == 0) {
-        if (overflow == null) {
-          deflater.end();
-          return false;
-        }
-        out = overflow;
-        offset = out.arrayOffset() + out.position();
-      }
-    }
-    deflater.end();
-    return length > outSize;
-  }
-
-  @Override
-  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
-
-    if(in.isDirect() && out.isDirect()) {
-      directDecompress(in, out);
-      return;
-    }
-
-    Inflater inflater = new Inflater(true);
-    inflater.setInput(in.array(), in.arrayOffset() + in.position(),
-                      in.remaining());
-    while (!(inflater.finished() || inflater.needsDictionary() ||
-             inflater.needsInput())) {
-      try {
-        int count = inflater.inflate(out.array(),
-                                     out.arrayOffset() + out.position(),
-                                     out.remaining());
-        out.position(count + out.position());
-      } catch (DataFormatException dfe) {
-        throw new IOException("Bad compression data", dfe);
-      }
-    }
-    out.flip();
-    inflater.end();
-    in.position(in.limit());
-  }
-
-  @Override
-  public boolean isAvailable() {
-    if (direct == null) {
-      // see nowrap option in new Inflater(boolean) which disables zlib headers
-      try {
-        if (ShimLoader.getHadoopShims().getDirectDecompressor(
-            DirectCompressionType.ZLIB_NOHEADER) != null) {
-          direct = Boolean.valueOf(true);
-        } else {
-          direct = Boolean.valueOf(false);
-        }
-      } catch (UnsatisfiedLinkError ule) {
-        direct = Boolean.valueOf(false);
-      }
-    }
-    return direct.booleanValue();
-  }
-
-  @Override
-  public void directDecompress(ByteBuffer in, ByteBuffer out)
-      throws IOException {
-    DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
-        .getDirectDecompressor(DirectCompressionType.ZLIB_NOHEADER);
-    decompressShim.decompress(in, out);
-    out.flip(); // flip for read
-  }
-
-  @Override
-  public CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers) {
-
-    if (modifiers == null) {
-      return this;
-    }
-
-    int l = this.level;
-    int s = this.strategy;
-
-    for (Modifier m : modifiers) {
-      switch (m) {
-      case BINARY:
-        /* filtered == less LZ77, more huffman */
-        s = Deflater.FILTERED;
-        break;
-      case TEXT:
-        s = Deflater.DEFAULT_STRATEGY;
-        break;
-      case FASTEST:
-        // deflate_fast looking for 8 byte patterns
-        l = Deflater.BEST_SPEED;
-        break;
-      case FAST:
-        // deflate_fast looking for 16 byte patterns
-        l = Deflater.BEST_SPEED + 1;
-        break;
-      case DEFAULT:
-        // deflate_slow looking for 128 byte patterns
-        l = Deflater.DEFAULT_COMPRESSION;
-        break;
-      default:
-        break;
-      }
-    }
-    return new ZlibCodec(l, s);
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/proto/orc_proto.proto b/tajo-storage/tajo-storage-hdfs/src/main/proto/orc_proto.proto
deleted file mode 100644
index c80cf6c..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/proto/orc_proto.proto
+++ /dev/null
@@ -1,217 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-message IntegerStatistics  {
-  optional sint64 minimum = 1;
-  optional sint64 maximum = 2;
-  optional sint64 sum = 3;
-}
-
-message DoubleStatistics {
-  optional double minimum = 1;
-  optional double maximum = 2;
-  optional double sum = 3;
-}
-
-message StringStatistics {
-  optional string minimum = 1;
-  optional string maximum = 2;
-  // sum will store the total length of all strings in a stripe
-  optional sint64 sum = 3;
-}
-
-message BucketStatistics {
-  repeated uint64 count = 1 [packed=true];
-}
-
-message DecimalStatistics {
-  optional string minimum = 1;
-  optional string maximum = 2;
-  optional string sum = 3;
-}
-
-message DateStatistics {
-  // min,max values saved as days since epoch
-  optional sint32 minimum = 1;
-  optional sint32 maximum = 2;
-}
-
-message TimestampStatistics {
-  // min,max values saved as milliseconds since epoch
-  optional sint64 minimum = 1;
-  optional sint64 maximum = 2;
-}
-
-message BinaryStatistics {
-  // sum will store the total binary blob length in a stripe
-  optional sint64 sum = 1;
-}
-
-message ColumnStatistics {
-  optional uint64 numberOfValues = 1;
-  optional IntegerStatistics intStatistics = 2;
-  optional DoubleStatistics doubleStatistics = 3;
-  optional StringStatistics stringStatistics = 4;
-  optional BucketStatistics bucketStatistics = 5;
-  optional DecimalStatistics decimalStatistics = 6;
-  optional DateStatistics dateStatistics = 7;
-  optional BinaryStatistics binaryStatistics = 8;
-  optional TimestampStatistics timestampStatistics = 9;
-  optional bool hasNull = 10;
-}
-
-message RowIndexEntry {
-  repeated uint64 positions = 1 [packed=true];
-  optional ColumnStatistics statistics = 2;
-}
-
-message RowIndex {
-  repeated RowIndexEntry entry = 1;
-}
-
-message BloomFilter {
-  optional uint32 numHashFunctions = 1;
-  repeated fixed64 bitset = 2;
-}
-
-message BloomFilterIndex {
-  repeated BloomFilter bloomFilter = 1;
-}
-
-message Stream {
-  // if you add new index stream kinds, you need to make sure to update
-  // StreamName to ensure it is added to the stripe in the right area
-  enum Kind {
-    PRESENT = 0;
-    DATA = 1;
-    LENGTH = 2;
-    DICTIONARY_DATA = 3;
-    DICTIONARY_COUNT = 4;
-    SECONDARY = 5;
-    ROW_INDEX = 6;
-    BLOOM_FILTER = 7;
-  }
-  optional Kind kind = 1;
-  optional uint32 column = 2;
-  optional uint64 length = 3;
-}
-
-message ColumnEncoding {
-  enum Kind {
-    DIRECT = 0;
-    DICTIONARY = 1;
-    DIRECT_V2 = 2;
-    DICTIONARY_V2 = 3;
-  }
-  optional Kind kind = 1;
-  optional uint32 dictionarySize = 2;
-}
-
-message StripeFooter {
-  repeated Stream streams = 1;
-  repeated ColumnEncoding columns = 2;
-  optional string writerTimezone = 3;
-}
-
-message Type {
-  enum Kind {
-    BOOLEAN = 0;
-    BYTE = 1;
-    SHORT = 2;
-    INT = 3;
-    LONG = 4;
-    FLOAT = 5;
-    DOUBLE = 6;
-    STRING = 7;
-    BINARY = 8;
-    TIMESTAMP = 9;
-    LIST = 10;
-    MAP = 11;
-    STRUCT = 12;
-    UNION = 13;
-    DECIMAL = 14;
-    DATE = 15;
-    VARCHAR = 16;
-    CHAR = 17;
-  }
-  optional Kind kind = 1;
-  repeated uint32 subtypes = 2 [packed=true];
-  repeated string fieldNames = 3;
-  optional uint32 maximumLength = 4;
-  optional uint32 precision = 5;
-  optional uint32 scale = 6;
-}
-
-message StripeInformation {
-  optional uint64 offset = 1;
-  optional uint64 indexLength = 2;
-  optional uint64 dataLength = 3;
-  optional uint64 footerLength = 4;
-  optional uint64 numberOfRows = 5;
-}
-
-message UserMetadataItem {
-  optional string name = 1;
-  optional bytes value = 2;
-}
-
-message StripeStatistics {
-  repeated ColumnStatistics colStats = 1;
-}
-
-message Metadata {
-  repeated StripeStatistics stripeStats = 1;
-}
-
-message Footer {
-  optional uint64 headerLength = 1;
-  optional uint64 contentLength = 2;
-  repeated StripeInformation stripes = 3;
-  repeated Type types = 4;
-  repeated UserMetadataItem metadata = 5;
-  optional uint64 numberOfRows = 6;
-  repeated ColumnStatistics statistics = 7;
-  optional uint32 rowIndexStride = 8;
-}
-
-enum CompressionKind {
-  NONE = 0;
-  ZLIB = 1;
-  SNAPPY = 2;
-  LZO = 3;
-}
-
-// Serialized length must be less that 255 bytes
-message PostScript {
-  optional uint64 footerLength = 1;
-  optional CompressionKind compression = 2;
-  optional uint64 compressionBlockSize = 3;
-  // the version of the file format
-  //   [0, 11] = Hive 0.11
-  //   [0, 12] = Hive 0.12
-  repeated uint32 version = 4 [packed = true];
-  optional uint64 metadataLength = 5;
-  // Version of the writer:
-  //   0 (or missing) = original
-  //   1 = HIVE-8732 fixed
-  optional uint32 writerVersion = 6;
-  // Leave this last in the record
-  optional string magic = 8000;
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestCompressionStorages.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestCompressionStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestCompressionStorages.java
index b63b497..608d066 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestCompressionStorages.java
+++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestCompressionStorages.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.compress.*;
 import org.apache.hadoop.io.compress.zlib.ZlibFactory;
 import org.apache.hadoop.util.NativeCodeLoader;
+import org.apache.orc.OrcConf;
 import org.apache.tajo.BuiltinStorages;
 import org.apache.tajo.catalog.CatalogUtil;
 import org.apache.tajo.catalog.Schema;
@@ -61,6 +62,7 @@ public class TestCompressionStorages {
   public TestCompressionStorages(String type) throws IOException {
     this.dataFormat = type;
     conf = new TajoConf();
+    conf.setBoolean("hive.exec.orc.zerocopy", true);
 
     testDir = CommonTestingUtil.getTestDir(TEST_PATH);
     fs = testDir.getFileSystem(conf);
@@ -71,7 +73,8 @@ public class TestCompressionStorages {
     return Arrays.asList(new Object[][]{
         {BuiltinStorages.TEXT},
         {BuiltinStorages.RCFILE},
-        {BuiltinStorages.SEQUENCE_FILE}
+        {BuiltinStorages.SEQUENCE_FILE},
+        {BuiltinStorages.ORC}
     });
   }
 
@@ -120,6 +123,14 @@ public class TestCompressionStorages {
     meta.putProperty("rcfile.serde", TextSerializerDeserializer.class.getName());
     meta.putProperty("sequencefile.serde", TextSerializerDeserializer.class.getName());
 
+    if (codec.equals(SnappyCodec.class)) {
+      meta.putProperty(OrcConf.COMPRESS.getAttribute(), "SNAPPY");
+    } else if (codec.equals(Lz4Codec.class)) {
+      meta.putProperty(OrcConf.COMPRESS.getAttribute(), "ZLIB");
+    } else {
+      meta.putProperty(OrcConf.COMPRESS.getAttribute(), "NONE");
+    }
+
     String fileName = "Compression_" + codec.getSimpleName();
     Path tablePath = new Path(testDir, fileName);
     Appender appender = ((FileTablespace) TablespaceManager.getLocalFs()).getAppender(meta, schema, tablePath);

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
index 552dc2e..a9d61d5 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -167,6 +167,21 @@ public class TestStorages {
    fs.delete(testDir, true);
   }
 
+  private boolean protoTypeSupport() {
+    return internalType;
+  }
+
+  private boolean timeTypeSupport() {
+    return internalType
+        || dataFormat.equalsIgnoreCase(BuiltinStorages.TEXT);
+  }
+
+  private boolean dateTypeSupport() {
+    return internalType
+        || dataFormat.equalsIgnoreCase(BuiltinStorages.TEXT)
+        || dataFormat.equalsIgnoreCase(BuiltinStorages.ORC);
+  }
+
   @Test
   public void testSplitable() throws IOException {
     if (splitable) {
@@ -385,8 +400,6 @@ public class TestStorages {
 
   @Test
   public void testVariousTypes() throws IOException {
-    boolean handleProtobuf = !dataFormat.equalsIgnoreCase(BuiltinStorages.JSON);
-
     Schema schema = new Schema();
     schema.addColumn("col1", Type.BOOLEAN);
     schema.addColumn("col2", Type.CHAR, 7);
@@ -398,7 +411,7 @@ public class TestStorages {
     schema.addColumn("col8", Type.TEXT);
     schema.addColumn("col9", Type.BLOB);
     schema.addColumn("col10", Type.INET4);
-    if (handleProtobuf) {
+    if (protoTypeSupport()) {
       schema.addColumn("col11", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName()));
     }
 
@@ -418,7 +431,7 @@ public class TestStorages {
     QueryId queryid = new QueryId("12345", 5);
     ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
 
-    VTuple tuple = new VTuple(10 + (handleProtobuf ? 1 : 0));
+    VTuple tuple = new VTuple(10 + (protoTypeSupport() ? 1 : 0));
     tuple.put(new Datum[] {
         DatumFactory.createBool(true),
         DatumFactory.createChar("hyunsik"),
@@ -432,7 +445,7 @@ public class TestStorages {
         DatumFactory.createInet4("192.168.0.1"),
     });
 
-    if (handleProtobuf) {
+    if (protoTypeSupport()) {
       tuple.put(10, factory.createDatum(queryid.getProto()));
     }
 
@@ -456,8 +469,6 @@ public class TestStorages {
 
   @Test
   public void testNullHandlingTypes() throws IOException {
-    boolean handleProtobuf = !dataFormat.equalsIgnoreCase(BuiltinStorages.JSON);
-
     Schema schema = new Schema();
     schema.addColumn("col1", Type.BOOLEAN);
     schema.addColumn("col2", Type.CHAR, 7);
@@ -470,7 +481,7 @@ public class TestStorages {
     schema.addColumn("col9", Type.BLOB);
     schema.addColumn("col10", Type.INET4);
 
-    if (handleProtobuf) {
+    if (protoTypeSupport()) {
       schema.addColumn("col11", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName()));
     }
 
@@ -492,7 +503,7 @@ public class TestStorages {
 
     QueryId queryid = new QueryId("12345", 5);
     ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
-    int columnNum = 10 + (handleProtobuf ? 1 : 0);
+    int columnNum = 10 + (protoTypeSupport() ? 1 : 0);
     VTuple seedTuple = new VTuple(columnNum);
     seedTuple.put(new Datum[]{
         DatumFactory.createBool(true),                // 0
@@ -507,7 +518,7 @@ public class TestStorages {
         DatumFactory.createInet4("192.168.0.1")       // 10
     });
 
-    if (handleProtobuf) {
+    if (protoTypeSupport()) {
       seedTuple.put(10, factory.createDatum(queryid.getProto()));       // 11
     }
 
@@ -553,8 +564,6 @@ public class TestStorages {
   public void testNullHandlingTypesWithProjection() throws IOException {
     if (internalType) return;
 
-    boolean handleProtobuf = !dataFormat.equalsIgnoreCase(BuiltinStorages.JSON);
-
     Schema schema = new Schema();
     schema.addColumn("col1", Type.BOOLEAN);
     schema.addColumn("col2", Type.CHAR, 7);
@@ -567,7 +576,7 @@ public class TestStorages {
     schema.addColumn("col9", Type.BLOB);
     schema.addColumn("col10", Type.INET4);
 
-    if (handleProtobuf) {
+    if (protoTypeSupport()) {
       schema.addColumn("col11", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName()));
     }
 
@@ -589,7 +598,7 @@ public class TestStorages {
 
     QueryId queryid = new QueryId("12345", 5);
     ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
-    int columnNum = 10 + (handleProtobuf ? 1 : 0);
+    int columnNum = 10 + (protoTypeSupport() ? 1 : 0);
     VTuple seedTuple = new VTuple(columnNum);
     seedTuple.put(new Datum[]{
         DatumFactory.createBool(true),                // 0
@@ -604,7 +613,7 @@ public class TestStorages {
         DatumFactory.createInet4("192.168.0.1")       // 10
     });
 
-    if (handleProtobuf) {
+    if (protoTypeSupport()) {
       seedTuple.put(10, factory.createDatum(queryid.getProto()));       // 11
     }
 
@@ -933,11 +942,17 @@ public class TestStorages {
 
   @Test
   public void testTime() throws IOException {
-    if (dataFormat.equalsIgnoreCase(BuiltinStorages.TEXT) || internalType) {
+    if (dateTypeSupport() || timeTypeSupport()) {
+
+      int index = 2;
       Schema schema = new Schema();
-      schema.addColumn("col1", Type.DATE);
-      schema.addColumn("col2", Type.TIME);
-      schema.addColumn("col3", Type.TIMESTAMP);
+      schema.addColumn("col1", Type.TIMESTAMP);
+      if (dateTypeSupport()) {
+        schema.addColumn("col" + index++, Type.DATE);
+      }
+      if (timeTypeSupport()) {
+        schema.addColumn("col" + index++, Type.TIME);
+      }
 
       KeyValueSet options = new KeyValueSet();
       TableMeta meta = CatalogUtil.newTableMeta(dataFormat, options);
@@ -947,11 +962,15 @@ public class TestStorages {
       Appender appender = sm.getAppender(meta, schema, tablePath);
       appender.init();
 
-      VTuple tuple = new VTuple(new Datum[]{
-          DatumFactory.createDate("1980-04-01"),
-          DatumFactory.createTime("12:34:56"),
-          DatumFactory.createTimestmpDatumWithUnixTime((int)(System.currentTimeMillis() / 1000))
-      });
+      VTuple tuple = new VTuple(index - 1);
+      index = 0;
+      tuple.put(index++, DatumFactory.createTimestmpDatumWithUnixTime((int)(System.currentTimeMillis() / 1000)));
+      if (dateTypeSupport()) {
+        tuple.put(index++, DatumFactory.createDate("1980-04-01"));
+      }
+      if (timeTypeSupport()) {
+        tuple.put(index, DatumFactory.createTime("12:34:56"));
+      }
       appender.addTuple(tuple);
       appender.flush();
       appender.close();
@@ -964,7 +983,7 @@ public class TestStorages {
       Tuple retrieved;
       while ((retrieved = scanner.next()) != null) {
         for (int i = 0; i < tuple.size(); i++) {
-          assertEquals(tuple.get(i), retrieved.asDatum(i));
+          assertEquals("failed at " + i + " th column", tuple.get(i), retrieved.asDatum(i));
         }
       }
       scanner.close();

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/test/resources/dataset/testVariousTypes.avsc
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/resources/dataset/testVariousTypes.avsc b/tajo-storage/tajo-storage-hdfs/src/test/resources/dataset/testVariousTypes.avsc
index f71f052..f1d1368 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/resources/dataset/testVariousTypes.avsc
+++ b/tajo-storage/tajo-storage-hdfs/src/test/resources/dataset/testVariousTypes.avsc
@@ -12,8 +12,7 @@
     { "name": "col7", "type": "double" },
     { "name": "col8", "type": "string" },
     { "name": "col9", "type": "bytes" },
-    { "name": "col10", "type": "bytes" },
-    { "name": "col11", "type": "bytes" }
+    { "name": "col10", "type": "bytes" }
   ]
 }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/test/resources/storage-default.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/resources/storage-default.xml b/tajo-storage/tajo-storage-hdfs/src/test/resources/storage-default.xml
index 6f7e53b..3283f9f 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/resources/storage-default.xml
+++ b/tajo-storage/tajo-storage-hdfs/src/test/resources/storage-default.xml
@@ -117,7 +117,7 @@
 
   <property>
     <name>tajo.storage.scanner-handler.orc.class</name>
-    <value>org.apache.tajo.storage.orc.ORCScanner</value>
+    <value>org.apache.tajo.storage.orc.OrcScanner</value>
   </property>
 
   <property>

[2/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Posted by ji...@apache.org.

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
new file mode 100644
index 0000000..6ab630a
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TreeReaderFactory.java
@@ -0,0 +1,1557 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.thirdparty.orc;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.io.Text;
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.*;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.TypeDesc;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.exception.TajoRuntimeException;
+import org.apache.tajo.exception.UnsupportedException;
+import org.apache.tajo.util.datetime.DateTimeConstants;
+import org.apache.tajo.util.datetime.DateTimeUtil;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Timestamp;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+import static org.apache.tajo.storage.thirdparty.orc.WriterImpl.BASE_TIMESTAMP_STRING;
+
+public class TreeReaderFactory {
+
+  private final static Log LOG = LogFactory.getLog(TreeReaderFactory.class);
+
+  public static class TreeReaderSchema {
+
+    /**
+     * The types in the ORC file.
+     */
+    List<OrcProto.Type> fileTypes;
+
+    /**
+     * The treeReaderSchema that the reader should read as.
+     */
+    List<OrcProto.Type> schemaTypes;
+
+    /**
+     * The subtype of the row STRUCT.  Different than 0 for ACID.
+     */
+    int innerStructSubtype;
+
+    public TreeReaderSchema() {
+      fileTypes = null;
+      schemaTypes = null;
+      innerStructSubtype = -1;
+    }
+
+    public TreeReaderSchema fileTypes(List<OrcProto.Type> fileTypes) {
+      this.fileTypes = fileTypes;
+      return this;
+    }
+
+    public TreeReaderSchema schemaTypes(List<OrcProto.Type> schemaTypes) {
+      this.schemaTypes = schemaTypes;
+      return this;
+    }
+
+    public TreeReaderSchema innerStructSubtype(int innerStructSubtype) {
+      this.innerStructSubtype = innerStructSubtype;
+      return this;
+    }
+
+    public List<OrcProto.Type> getFileTypes() {
+      return fileTypes;
+    }
+
+    public List<OrcProto.Type> getSchemaTypes() {
+      return schemaTypes;
+    }
+
+    public int getInnerStructSubtype() {
+      return innerStructSubtype;
+    }
+  }
+
+  public abstract static class TreeReader {
+    protected final int columnId;
+    protected BitFieldReader present = null;
+    protected boolean valuePresent = false;
+
+    TreeReader(int columnId) throws IOException {
+      this(columnId, null);
+    }
+
+    protected TreeReader(int columnId, InStream in) throws IOException {
+      this.columnId = columnId;
+      if (in == null) {
+        present = null;
+        valuePresent = true;
+      } else {
+        present = new BitFieldReader(in, 1);
+      }
+    }
+
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
+                                             InStream in,
+                                             boolean signed, boolean skipCorrupt) throws IOException {
+      switch (kind) {
+        case DIRECT_V2:
+        case DICTIONARY_V2:
+          return new RunLengthIntegerReaderV2(in, signed, skipCorrupt);
+        case DIRECT:
+        case DICTIONARY:
+          return new RunLengthIntegerReader(in, signed);
+        default:
+          throw new IllegalArgumentException("Unknown encoding " + kind);
+      }
+    }
+
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      checkEncoding(stripeFooter.getColumnsList().get(columnId));
+      InStream in = streams.get(new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.PRESENT));
+      if (in == null) {
+        present = null;
+        valuePresent = true;
+      } else {
+        present = new BitFieldReader(in, 1);
+      }
+    }
+
+    /**
+     * Seek to the given position.
+     *
+     * @param index the indexes loaded from the file
+     * @throws IOException
+     */
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    public void seek(PositionProvider index) throws IOException {
+      if (present != null) {
+        present.seek(index);
+      }
+    }
+
+    protected long countNonNulls(long rows) throws IOException {
+      if (present != null) {
+        long result = 0;
+        for (long c = 0; c < rows; ++c) {
+          if (present.next() == 1) {
+            result += 1;
+          }
+        }
+        return result;
+      } else {
+        return rows;
+      }
+    }
+
+    abstract void skipRows(long rows) throws IOException;
+
+    public BitFieldReader getPresent() {
+      return present;
+    }
+  }
+
+  public abstract static class DatumTreeReader extends TreeReader {
+
+    DatumTreeReader(int columnId) throws IOException {
+      super(columnId);
+    }
+
+    protected DatumTreeReader(int columnId, InStream in) throws IOException {
+      super(columnId, in);
+    }
+
+    Datum next() throws IOException {
+      if (present != null) {
+        valuePresent = present.next() == 1;
+      }
+      return NullDatum.get();
+    }
+  }
+
+  public abstract static class RawStringTreeReader extends TreeReader {
+    RawStringTreeReader(int columnId) throws IOException {
+      super(columnId);
+    }
+
+    protected RawStringTreeReader(int columnId, InStream in) throws IOException {
+      super(columnId, in);
+    }
+
+    byte[] next() throws IOException {
+      if (present != null) {
+        valuePresent = present.next() == 1;
+      }
+      return null;
+    }
+  }
+
+  public static class BooleanTreeReader extends DatumTreeReader {
+    protected BitFieldReader reader = null;
+
+    BooleanTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected BooleanTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      if (data != null) {
+        reader = new BitFieldReader(data, 1);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      reader = new BitFieldReader(streams.get(new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA)), 1);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createBool(reader.next() == 1) : NullDatum.get();
+    }
+  }
+
+  public static class ByteTreeReader extends DatumTreeReader {
+    protected RunLengthByteReader reader = null;
+
+    ByteTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected ByteTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      this.reader = new RunLengthByteReader(data);
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      reader = new RunLengthByteReader(streams.get(new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA)));
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createBit(reader.next()) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class ShortTreeReader extends DatumTreeReader {
+    protected IntegerReader reader = null;
+
+    ShortTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected ShortTreeReader(int columnId, InStream present, InStream data,
+                              OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createInt2((short) reader.next()) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class InetTreeReader extends DatumTreeReader {
+    protected IntegerReader reader = null;
+
+    InetTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected InetTreeReader(int columnId, InStream present, InStream data,
+                             OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createInet4((int) reader.next()) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class IntTreeReader extends DatumTreeReader {
+    protected IntegerReader reader = null;
+
+    IntTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected IntTreeReader(int columnId, InStream present, InStream data,
+                            OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createInt4((int) reader.next()) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class LongTreeReader extends DatumTreeReader {
+    protected IntegerReader reader = null;
+
+    LongTreeReader(int columnId, boolean skipCorrupt) throws IOException {
+      this(columnId, null, null, null, skipCorrupt);
+    }
+
+    protected LongTreeReader(int columnId, InStream present, InStream data,
+                             OrcProto.ColumnEncoding encoding,
+                             boolean skipCorrupt)
+        throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        this.reader = createIntegerReader(encoding.getKind(), data, true, skipCorrupt);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createInt8(reader.next()) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  public static class FloatTreeReader extends DatumTreeReader {
+    protected InStream stream;
+    private final org.apache.orc.impl.SerializationUtils utils;
+
+    FloatTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected FloatTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      this.utils = new org.apache.orc.impl.SerializationUtils();
+      this.stream = data;
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createFloat4(utils.readFloat(stream)) : NullDatum.get();
+    }
+
+    @Override
+    protected void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      for (int i = 0; i < items; ++i) {
+        utils.readFloat(stream);
+      }
+    }
+  }
+
+  public static class DoubleTreeReader extends DatumTreeReader {
+    protected InStream stream;
+    private final org.apache.orc.impl.SerializationUtils utils;
+
+    DoubleTreeReader(int columnId) throws IOException {
+      this(columnId, null, null);
+    }
+
+    protected DoubleTreeReader(int columnId, InStream present, InStream data) throws IOException {
+      super(columnId, present);
+      this.utils = new SerializationUtils();
+      this.stream = data;
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name =
+          new org.apache.orc.impl.StreamName(columnId,
+              OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ? DatumFactory.createFloat8(utils.readDouble(stream)) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long len = items * 8;
+      while (len > 0) {
+        len -= stream.skip(len);
+      }
+    }
+  }
+
+  public static class BinaryTreeReader extends DatumTreeReader {
+    protected InStream stream;
+    protected IntegerReader lengths = null;
+    protected final LongColumnVector scratchlcv;
+
+    BinaryTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null, null);
+    }
+
+    protected BinaryTreeReader(int columnId, InStream present, InStream data, InStream length,
+                               OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present);
+      scratchlcv = new LongColumnVector();
+      this.stream = data;
+      if (length != null && encoding != null) {
+        checkEncoding(encoding);
+        this.lengths = createIntegerReader(encoding.getKind(), length, false, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.LENGTH)), false, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+      lengths.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+
+      if (valuePresent) {
+        int len = (int) lengths.next();
+        byte[] buf = new byte[len];
+        int offset = 0;
+        while (len > 0) {
+          int written = stream.read(buf, offset, len);
+          if (written < 0) {
+            throw new EOFException("Can't finish byte read from " + stream);
+          }
+          len -= written;
+          offset += written;
+        }
+        return DatumFactory.createBlob(buf);
+      } else {
+        return NullDatum.get();
+      }
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long lengthToSkip = 0;
+      for (int i = 0; i < items; ++i) {
+        lengthToSkip += lengths.next();
+      }
+      while (lengthToSkip > 0) {
+        lengthToSkip -= stream.skip(lengthToSkip);
+      }
+    }
+  }
+
+  public static class TimestampTreeReader extends DatumTreeReader {
+    protected IntegerReader data = null;
+    protected IntegerReader nanos = null;
+    private final boolean skipCorrupt;
+    private Map<String, Long> baseTimestampMap;
+    private long base_timestamp;
+    private final TimeZone readerTimeZone;
+    private TimeZone writerTimeZone;
+    private boolean hasSameTZRules;
+    private final TimeZone timeZone;
+
+    TimestampTreeReader(TimeZone timeZone, int columnId, boolean skipCorrupt) throws IOException {
+      this(timeZone, columnId, null, null, null, null, skipCorrupt);
+    }
+
+    protected TimestampTreeReader(TimeZone timeZone, int columnId, InStream presentStream, InStream dataStream,
+                                  InStream nanosStream, OrcProto.ColumnEncoding encoding, boolean skipCorrupt)
+        throws IOException {
+      super(columnId, presentStream);
+      this.skipCorrupt = skipCorrupt;
+      this.baseTimestampMap = new HashMap<>();
+      this.readerTimeZone = TimeZone.getDefault();
+      this.writerTimeZone = readerTimeZone;
+      this.hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
+      this.base_timestamp = getBaseTimestamp(readerTimeZone.getID());
+      if (encoding != null) {
+        checkEncoding(encoding);
+
+        if (dataStream != null) {
+          this.data = createIntegerReader(encoding.getKind(), dataStream, true, skipCorrupt);
+        }
+
+        if (nanosStream != null) {
+          this.nanos = createIntegerReader(encoding.getKind(), nanosStream, false, skipCorrupt);
+        }
+      }
+      this.timeZone = timeZone;
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      data = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new org.apache.orc.impl.StreamName(columnId,
+              OrcProto.Stream.Kind.DATA)), true, skipCorrupt);
+      nanos = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new org.apache.orc.impl.StreamName(columnId,
+              OrcProto.Stream.Kind.SECONDARY)), false, skipCorrupt);
+      getBaseTimestamp(stripeFooter.getWriterTimezone());
+    }
+
+    private long getBaseTimestamp(String timeZoneId) throws IOException {
+      // to make sure new readers read old files in the same way
+      if (timeZoneId == null || timeZoneId.isEmpty()) {
+        timeZoneId = readerTimeZone.getID();
+      }
+
+      if (!baseTimestampMap.containsKey(timeZoneId)) {
+        writerTimeZone = TimeZone.getTimeZone(timeZoneId);
+        hasSameTZRules = writerTimeZone.hasSameRules(readerTimeZone);
+        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        sdf.setTimeZone(writerTimeZone);
+        try {
+          long epoch = sdf.parse(BASE_TIMESTAMP_STRING).getTime() / DateTimeConstants.MSECS_PER_SEC;
+          baseTimestampMap.put(timeZoneId, epoch);
+          return epoch;
+        } catch (ParseException e) {
+          throw new IOException("Unable to create base timestamp", e);
+        } finally {
+          sdf.setTimeZone(readerTimeZone);
+        }
+      }
+
+      return baseTimestampMap.get(timeZoneId);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      data.seek(index);
+      nanos.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+
+      if (valuePresent) {
+        long millis = decodeTimestamp(data.next(), nanos.next(), base_timestamp);
+        long adjustedMillis = millis - writerTimeZone.getRawOffset();
+        return DatumFactory.createTimestamp(DateTimeUtil.javaTimeToJulianTime(adjustedMillis));
+      } else {
+        return NullDatum.get();
+      }
+    }
+
+    private static int parseNanos(long serialized) {
+      int zeros = 7 & (int) serialized;
+      int result = (int) (serialized >>> 3);
+      if (zeros != 0) {
+        for (int i = 0; i <= zeros; ++i) {
+          result *= 10;
+        }
+      }
+      return result;
+    }
+
+    // borrowed from Facebook's TimestampStreamReader
+    private static long decodeTimestamp(long seconds, long serializedNanos, long baseTimestampInSeconds) {
+      long millis = (seconds + baseTimestampInSeconds) * DateTimeConstants.MSECS_PER_SEC;
+      long nanos = parseNanos(serializedNanos);
+
+      // the rounding error exists because java always rounds up when dividing integers
+      // -42001/1000 = -42; and -42001 % 1000 = -1 (+ 1000)
+      // to get the correct value we need
+      // (-42 - 1)*1000 + 999 = -42001
+      // (42)*1000 + 1 = 42001
+      if (millis < 0 && nanos != 0) {
+        millis -= 1000;
+      }
+      // Truncate nanos to millis and add to mills
+      return millis + (nanos / 1_000_000);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      data.skip(items);
+      nanos.skip(items);
+    }
+  }
+
+  public static class DateTreeReader extends DatumTreeReader {
+    protected IntegerReader reader = null;
+
+    DateTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null);
+    }
+
+    protected DateTreeReader(int columnId, InStream present, InStream data,
+                             OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present);
+      if (data != null && encoding != null) {
+        checkEncoding(encoding);
+        reader = createIntegerReader(encoding.getKind(), data, true, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
+          (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), true, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      super.next();
+      return valuePresent ?
+          DatumFactory.createDate((int) reader.next() + DateTimeUtil.DAYS_FROM_JULIAN_TO_EPOCH) : NullDatum.get();
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+  }
+
+  /**
+   * A tree reader that will read string columns. At the start of the
+   * stripe, it creates an internal reader based on whether a direct or
+   * dictionary encoding was used.
+   */
+  public static class StringTreeReader extends DatumTreeReader {
+    protected RawStringTreeReader reader;
+
+    StringTreeReader(int columnId) throws IOException {
+      super(columnId);
+    }
+
+    protected StringTreeReader(int columnId, InStream present, InStream data, InStream length,
+                               InStream dictionary, OrcProto.ColumnEncoding encoding) throws IOException {
+      super(columnId, present);
+      if (encoding != null) {
+        switch (encoding.getKind()) {
+          case DIRECT:
+          case DIRECT_V2:
+            reader = new StringDirectTreeReader(columnId, present, data, length,
+                encoding.getKind());
+            break;
+          case DICTIONARY:
+          case DICTIONARY_V2:
+            reader = new StringDictionaryTreeReader(columnId, present, data, length, dictionary,
+                encoding);
+            break;
+          default:
+            throw new IllegalArgumentException("Unsupported encoding " +
+                encoding.getKind());
+        }
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      reader.checkEncoding(encoding);
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      // For each stripe, checks the encoding and initializes the appropriate
+      // reader
+      switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
+        case DIRECT:
+        case DIRECT_V2:
+          reader = new StringDirectTreeReader(columnId);
+          break;
+        case DICTIONARY:
+        case DICTIONARY_V2:
+          reader = new StringDictionaryTreeReader(columnId);
+          break;
+        default:
+          throw new IllegalArgumentException("Unsupported encoding " +
+              stripeFooter.getColumnsList().get(columnId).getKind());
+      }
+      reader.startStripe(streams, stripeFooter);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      byte[] bytes = reader.next();
+      return bytes == null ? NullDatum.get() : DatumFactory.createText(bytes);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skipRows(items);
+    }
+  }
+
+  private final static class BasicTextReaderShim {
+    private final InputStream in;
+
+    public BasicTextReaderShim(InputStream in) {
+      this.in = in;
+    }
+
+    public byte[] read(int len) throws IOException {
+      int offset = 0;
+      byte[] bytes = new byte[len];
+      while (len > 0) {
+        int written = in.read(bytes, offset, len);
+        if (written < 0) {
+          throw new EOFException("Can't finish read from " + in + " read "
+              + (offset) + " bytes out of " + bytes.length);
+        }
+        len -= written;
+        offset += written;
+      }
+      return bytes;
+    }
+  }
+
+  /**
+   * A reader for string columns that are direct encoded in the current
+   * stripe.
+   */
+  public static class StringDirectTreeReader extends RawStringTreeReader {
+    protected InStream stream;
+    protected BasicTextReaderShim data;
+    protected IntegerReader lengths;
+    private final LongColumnVector scratchlcv;
+
+    StringDirectTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null, null);
+    }
+
+    protected StringDirectTreeReader(int columnId, InStream present, InStream data,
+                                     InStream length, OrcProto.ColumnEncoding.Kind encoding) throws IOException {
+      super(columnId, present);
+      this.scratchlcv = new LongColumnVector();
+      this.stream = data;
+      if (length != null && encoding != null) {
+        this.lengths = createIntegerReader(encoding, length, false, false);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT &&
+          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+      data = new BasicTextReaderShim(stream);
+
+      lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
+          false, false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      stream.seek(index);
+      // don't seek data stream
+      lengths.seek(index);
+    }
+
+    @Override
+    byte[] next() throws IOException {
+      super.next();
+      int len = (int) lengths.next();
+      return valuePresent ? data.read(len) : null;
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long lengthToSkip = 0;
+      for (int i = 0; i < items; ++i) {
+        lengthToSkip += lengths.next();
+      }
+
+      while (lengthToSkip > 0) {
+        lengthToSkip -= stream.skip(lengthToSkip);
+      }
+    }
+
+    public IntegerReader getLengths() {
+      return lengths;
+    }
+
+    public InStream getStream() {
+      return stream;
+    }
+  }
+
+  /**
+   * A reader for string columns that are dictionary encoded in the current
+   * stripe.
+   */
+  public static class StringDictionaryTreeReader extends RawStringTreeReader {
+    private org.apache.orc.impl.DynamicByteArray dictionaryBuffer;
+    private int[] dictionaryOffsets;
+    protected IntegerReader reader;
+
+    private byte[] dictionaryBufferInBytesCache = null;
+    private final LongColumnVector scratchlcv;
+    private final Text result = new Text();
+
+    StringDictionaryTreeReader(int columnId) throws IOException {
+      this(columnId, null, null, null, null, null);
+    }
+
+    protected StringDictionaryTreeReader(int columnId, InStream present, InStream data,
+                                         InStream length, InStream dictionary, OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      super(columnId, present);
+      scratchlcv = new LongColumnVector();
+      if (data != null && encoding != null) {
+        this.reader = createIntegerReader(encoding.getKind(), data, false, false);
+      }
+
+      if (dictionary != null && encoding != null) {
+        readDictionaryStream(dictionary);
+      }
+
+      if (length != null && encoding != null) {
+        readDictionaryLengthStream(length, encoding);
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY &&
+          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId);
+      }
+    }
+
+    @Override
+    void startStripe(Map<org.apache.orc.impl.StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      super.startStripe(streams, stripeFooter);
+
+      // read the dictionary blob
+      org.apache.orc.impl.StreamName name = new org.apache.orc.impl.StreamName(columnId,
+          OrcProto.Stream.Kind.DICTIONARY_DATA);
+      InStream in = streams.get(name);
+      readDictionaryStream(in);
+
+      // read the lengths
+      name = new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.LENGTH);
+      in = streams.get(name);
+      readDictionaryLengthStream(in, stripeFooter.getColumnsList().get(columnId));
+
+      // set up the row reader
+      name = new org.apache.orc.impl.StreamName(columnId, OrcProto.Stream.Kind.DATA);
+      reader = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
+          streams.get(name), false, false);
+    }
+
+    private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding)
+        throws IOException {
+      int dictionarySize = encoding.getDictionarySize();
+      if (in != null) { // Guard against empty LENGTH stream.
+        IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false);
+        int offset = 0;
+        if (dictionaryOffsets == null ||
+            dictionaryOffsets.length < dictionarySize + 1) {
+          dictionaryOffsets = new int[dictionarySize + 1];
+        }
+        for (int i = 0; i < dictionarySize; ++i) {
+          dictionaryOffsets[i] = offset;
+          offset += (int) lenReader.next();
+        }
+        dictionaryOffsets[dictionarySize] = offset;
+        in.close();
+      }
+
+    }
+
+    private void readDictionaryStream(InStream in) throws IOException {
+      if (in != null) { // Guard against empty dictionary stream.
+        if (in.available() > 0) {
+          dictionaryBuffer = new DynamicByteArray(64, in.available());
+          dictionaryBuffer.readAll(in);
+          // Since its start of strip invalidate the cache.
+          dictionaryBufferInBytesCache = null;
+        }
+        in.close();
+      } else {
+        dictionaryBuffer = null;
+      }
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      seek(index[columnId]);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      super.seek(index);
+      reader.seek(index);
+    }
+
+    @Override
+    byte[] next() throws IOException {
+      super.next();
+      if (valuePresent) {
+        int entry = (int) reader.next();
+        int offset = dictionaryOffsets[entry];
+        int length = getDictionaryEntryLength(entry, offset);
+        // If the column is just empty strings, the size will be zero,
+        // so the buffer will be null, in that case just return result
+        // as it will default to empty
+        if (dictionaryBuffer != null) {
+          dictionaryBuffer.setText(result, offset, length);
+        } else {
+          result.clear();
+        }
+        return result.getBytes();
+      } else {
+        return null;
+      }
+    }
+
+    int getDictionaryEntryLength(int entry, int offset) {
+      final int length;
+      // if it isn't the last entry, subtract the offsets otherwise use
+      // the buffer length.
+      if (entry < dictionaryOffsets.length - 1) {
+        length = dictionaryOffsets[entry + 1] - offset;
+      } else {
+        length = dictionaryBuffer.size() - offset;
+      }
+      return length;
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skip(countNonNulls(items));
+    }
+
+    public IntegerReader getReader() {
+      return reader;
+    }
+  }
+
+  /**
+   * A tree reader that will read string columns. At the start of the
+   * stripe, it creates an internal reader based on whether a direct or
+   * dictionary encoding was used.
+   */
+  public static class CharTreeReader extends DatumTreeReader {
+    protected RawStringTreeReader reader;
+    private final int maxLength;
+
+    CharTreeReader(int columnId, int maxLength) throws IOException {
+      this(columnId, null, null, null, null, null, maxLength);
+    }
+
+    protected CharTreeReader(int columnId, InStream present, InStream data, InStream length,
+                             InStream dictionary, OrcProto.ColumnEncoding encoding, int maxLength) throws IOException {
+      super(columnId, present);
+      this.maxLength = maxLength;
+      if (encoding != null) {
+        switch (encoding.getKind()) {
+          case DIRECT:
+          case DIRECT_V2:
+            reader = new StringDirectTreeReader(columnId, present, data, length,
+                encoding.getKind());
+            break;
+          case DICTIONARY:
+          case DICTIONARY_V2:
+            reader = new StringDictionaryTreeReader(columnId, present, data, length, dictionary,
+                encoding);
+            break;
+          default:
+            throw new IllegalArgumentException("Unsupported encoding " +
+                encoding.getKind());
+        }
+      }
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      reader.checkEncoding(encoding);
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+                     OrcProto.StripeFooter stripeFooter
+    ) throws IOException {
+      // For each stripe, checks the encoding and initializes the appropriate
+      // reader
+      switch (stripeFooter.getColumnsList().get(columnId).getKind()) {
+        case DIRECT:
+        case DIRECT_V2:
+          reader = new StringDirectTreeReader(columnId);
+          break;
+        case DICTIONARY:
+        case DICTIONARY_V2:
+          reader = new StringDictionaryTreeReader(columnId);
+          break;
+        default:
+          throw new IllegalArgumentException("Unsupported encoding " +
+              stripeFooter.getColumnsList().get(columnId).getKind());
+      }
+      reader.startStripe(streams, stripeFooter);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    public void seek(PositionProvider index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    Datum next() throws IOException {
+      byte[] bytes = reader.next();
+
+      if (bytes == null) {
+        return NullDatum.get();
+      }
+      // TODO: enforce char length
+      return DatumFactory.createChar(bytes);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skipRows(items);
+    }
+  }
+
+  // TODO: enable this to support record type
+//  protected static class StructTreeReader extends TreeReader {
+//    private final int fileColumnCount;
+//    private final int resultColumnCount;
+//    protected final TreeReader[] fields;
+//    private final String[] fieldNames;
+//
+//    protected StructTreeReader(
+//        int columnId,
+//        TreeReaderSchema treeReaderSchema,
+//        boolean[] included,
+//        boolean skipCorrupt) throws IOException {
+//      super(columnId);
+//
+//      OrcProto.Type fileStructType = treeReaderSchema.getFileTypes().get(columnId);
+//      fileColumnCount = fileStructType.getFieldNamesCount();
+//
+//      OrcProto.Type schemaStructType = treeReaderSchema.getSchemaTypes().get(columnId);
+//
+//      if (columnId == treeReaderSchema.getInnerStructSubtype()) {
+//        // If there are more result columns than reader columns, we will default those additional
+//        // columns to NULL.
+//        resultColumnCount = schemaStructType.getFieldNamesCount();
+//      } else {
+//        resultColumnCount = fileColumnCount;
+//      }
+//
+//      this.fields = new TreeReader[fileColumnCount];
+//      this.fieldNames = new String[fileColumnCount];
+//
+//      if (included == null) {
+//        for (int i = 0; i < fileColumnCount; ++i) {
+//          int subtype = schemaStructType.getSubtypes(i);
+//          this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+//          // Use the treeReaderSchema evolution name since file/reader types may not have the real column name.
+//          this.fieldNames[i] = schemaStructType.getFieldNames(i);
+//        }
+//      } else {
+//        for (int i = 0; i < fileColumnCount; ++i) {
+//          int subtype = schemaStructType.getSubtypes(i);
+//          if (subtype >= included.length) {
+//            throw new IOException("subtype " + subtype + " exceeds the included array size " +
+//                included.length + " fileTypes " + treeReaderSchema.getFileTypes().toString() +
+//                " schemaTypes " + treeReaderSchema.getSchemaTypes().toString() +
+//                " innerStructSubtype " + treeReaderSchema.getInnerStructSubtype());
+//          }
+//          if (included[subtype]) {
+//            this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt);
+//          }
+//          // Use the treeReaderSchema evolution name since file/reader types may not have the real column name.
+//          this.fieldNames[i] = schemaStructType.getFieldNames(i);
+//        }
+//      }
+//    }
+//
+//    @Override
+//    void seek(PositionProvider[] index) throws IOException {
+//      super.seek(index);
+//      for (TreeReader kid : fields) {
+//        if (kid != null) {
+//          kid.seek(index);
+//        }
+//      }
+//    }
+//
+//    @Override
+//    Object next(Object previous) throws IOException {
+//      super.next(previous);
+//      OrcStruct result = null;
+//      if (valuePresent) {
+//        if (previous == null) {
+//          result = new OrcStruct(resultColumnCount);
+//        } else {
+//          result = (OrcStruct) previous;
+//
+//          // If the input format was initialized with a file with a
+//          // different number of fields, the number of fields needs to
+//          // be updated to the correct number
+//          if (result.getNumFields() != resultColumnCount) {
+//            result.setNumFields(resultColumnCount);
+//          }
+//        }
+//        for (int i = 0; i < fileColumnCount; ++i) {
+//          if (fields[i] != null) {
+//            result.setFieldValue(i, fields[i].next(result.getFieldValue(i)));
+//          }
+//        }
+//        if (resultColumnCount > fileColumnCount) {
+//          for (int i = fileColumnCount; i < resultColumnCount; ++i) {
+//            // Default new treeReaderSchema evolution fields to NULL.
+//            result.setFieldValue(i, null);
+//          }
+//        }
+//      }
+//      return result;
+//    }
+//
+//    @Override
+//    void startStripe(Map<StreamName, InStream> streams,
+//                     OrcProto.StripeFooter stripeFooter
+//    ) throws IOException {
+//      super.startStripe(streams, stripeFooter);
+//      for (TreeReader field : fields) {
+//        if (field != null) {
+//          field.startStripe(streams, stripeFooter);
+//        }
+//      }
+//    }
+//
+//    @Override
+//    void skipRows(long items) throws IOException {
+//      items = countNonNulls(items);
+//      for (TreeReader field : fields) {
+//        if (field != null) {
+//          field.skipRows(items);
+//        }
+//      }
+//    }
+//  }
+
+  public static DatumTreeReader createTreeReader(TimeZone timeZone,
+                                                 int columnId,
+                                                 Column column,
+                                                 boolean skipCorrupt
+  ) throws IOException {
+    TypeDesc typeDesc = column.getTypeDesc();
+    int orcColumnId = columnId + 1; // root record column is considered
+    switch (typeDesc.getDataType().getType()) {
+      case BOOLEAN:
+        return new BooleanTreeReader(orcColumnId);
+      case BIT:
+        return new ByteTreeReader(orcColumnId);
+      case FLOAT8:
+        return new DoubleTreeReader(orcColumnId);
+      case FLOAT4:
+        return new FloatTreeReader(orcColumnId);
+      case INT2:
+        return new ShortTreeReader(orcColumnId);
+      case INT4:
+        return new IntTreeReader(orcColumnId);
+      case INT8:
+        return new LongTreeReader(orcColumnId, skipCorrupt);
+      case TEXT:
+        return new StringTreeReader(orcColumnId);
+      case CHAR:
+        return new CharTreeReader(orcColumnId, typeDesc.getDataType().getLength());
+      case BLOB:
+        return new BinaryTreeReader(orcColumnId);
+      case TIMESTAMP:
+        return new TimestampTreeReader(timeZone, orcColumnId, skipCorrupt);
+      case DATE:
+        return new DateTreeReader(orcColumnId);
+      case INET4:
+        return new InetTreeReader(orcColumnId);
+//      case STRUCT:
+//        return new StructTreeReader(columnId, treeReaderSchema, included, skipCorrupt);
+      default:
+        throw new TajoRuntimeException(new UnsupportedException("Unsupported type " +
+            typeDesc.getDataType().getType().name()));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Writer.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Writer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Writer.java
index 669b44f..2c85aa6 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Writer.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Writer.java
@@ -18,6 +18,8 @@
 
 package org.apache.tajo.storage.thirdparty.orc;
 
+import org.apache.orc.OrcProto;
+import org.apache.orc.StripeInformation;
 import org.apache.tajo.storage.Tuple;
 
 import java.io.IOException;

[6/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Posted by ji...@apache.org.

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java
deleted file mode 100644
index d241f84..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.tajo.datum.Datum;
-
-public class TajoBlobObjectInspector extends TajoPrimitiveObjectInspector implements BinaryObjectInspector {
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.binaryTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.BINARY;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public BytesWritable getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return byte [].class;
-  }
-
-  @Override
-  public byte[] getPrimitiveJavaObject(Object o) {
-    return ((Datum)o).asByteArray();
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public int precision() {
-    return 0;
-  }
-
-  @Override
-  public int scale() {
-    return 0;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "BINARY";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java
deleted file mode 100644
index 273505f..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.tajo.datum.Datum;
-
-public class TajoBooleanObjectInspector extends TajoPrimitiveObjectInspector implements BooleanObjectInspector {
-  @Override
-  public boolean get(Object o) {
-    return ((Datum)o).asBool();
-  }
-
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.booleanTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.BOOLEAN;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Boolean.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "BOOLEAN";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java
deleted file mode 100644
index f12706b..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import java.sql.Date;
-
-public class TajoDateObjectInspector extends TajoPrimitiveObjectInspector implements DateObjectInspector {
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.dateTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.DATE;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public DateWritable getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return null;
-  }
-
-  @Override
-  public Date getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "DATE";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java
deleted file mode 100644
index 6dc1f8c..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.tajo.datum.Float8Datum;
-
-public class TajoDoubleObjectInspector extends TajoPrimitiveObjectInspector implements DoubleObjectInspector {
-  @Override
-  public double get(Object o) {
-    return ((Float8Datum)o).asFloat8();
-  }
-
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.doubleTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.DOUBLE;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Double.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "DOUBLE";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java
deleted file mode 100644
index bed8784..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.tajo.datum.Float4Datum;
-
-public class TajoFloatObjectInspector extends TajoPrimitiveObjectInspector implements DoubleObjectInspector {
-  @Override
-  public double get(Object o) {
-    return ((Float4Datum)o).asFloat4();
-  }
-
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.floatTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.FLOAT;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Float.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "FLOAT";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoIntObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoIntObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoIntObjectInspector.java
deleted file mode 100644
index a0c2209..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoIntObjectInspector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.tajo.datum.Int4Datum;
-
-public class TajoIntObjectInspector extends TajoPrimitiveObjectInspector implements IntObjectInspector {
-  @Override
-  public int get(Object o) {
-    return ((Int4Datum)o).asInt4();
-  }
-
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.intTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.INT;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Integer.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "INT";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoLongObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoLongObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoLongObjectInspector.java
deleted file mode 100644
index b30b333..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoLongObjectInspector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.tajo.datum.Int8Datum;
-
-public class TajoLongObjectInspector extends TajoPrimitiveObjectInspector implements LongObjectInspector {
-  @Override
-  public long get(Object o) {
-    return ((Int8Datum)o).asInt8();
-  }
-
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.shortTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.LONG;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Long.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "LONG";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoNullObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoNullObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoNullObjectInspector.java
deleted file mode 100644
index 49998ce..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoNullObjectInspector.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-public class TajoNullObjectInspector extends TajoPrimitiveObjectInspector {
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.voidTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.VOID;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Void.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "NULL";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoPrimitiveObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoPrimitiveObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoPrimitiveObjectInspector.java
deleted file mode 100644
index 90ac178..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoPrimitiveObjectInspector.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-
-public abstract class TajoPrimitiveObjectInspector implements PrimitiveObjectInspector {
-  @Override
-  public Category getCategory() {
-    return Category.PRIMITIVE;
-  }
-
-  @Override
-  public int precision() {
-    return 0;
-  }
-
-  @Override
-  public int scale() {
-    return 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoShortObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoShortObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoShortObjectInspector.java
deleted file mode 100644
index d32bee1..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoShortObjectInspector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.tajo.datum.Int2Datum;
-
-public class TajoShortObjectInspector extends TajoPrimitiveObjectInspector implements ShortObjectInspector {
-  @Override
-  public short get(Object o) {
-    return ((Int2Datum)o).asInt2();
-  }
-
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.shortTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.SHORT;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Object getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return Short.class;
-  }
-
-  @Override
-  public Object getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "SHORT";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStringObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStringObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStringObjectInspector.java
deleted file mode 100644
index b9331da..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStringObjectInspector.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.Text;
-
-public class TajoStringObjectInspector extends TajoPrimitiveObjectInspector implements StringObjectInspector {
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.stringTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.STRING;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public Text getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return null;
-  }
-
-  @Override
-  public String getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "STRING";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStructObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStructObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStructObjectInspector.java
deleted file mode 100644
index 7521fa3..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoStructObjectInspector.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.tajo.catalog.Column;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.exception.UnsupportedException;
-
-import java.util.ArrayList;
-import java.util.List;
-
-public class TajoStructObjectInspector extends StructObjectInspector {
-  private final static Log LOG = LogFactory.getLog(TajoStructObjectInspector.class);
-  private List<TajoStructField> structFields;
-
-  static class TajoStructField implements StructField {
-    private String name;
-    private ObjectInspector oi;
-    private String comment;
-
-    TajoStructField(String name, ObjectInspector oi) {
-      this(name, oi, null);
-    }
-
-    TajoStructField(String name, ObjectInspector oi, String comment) {
-      this.name = name;
-      this.oi = oi;
-      this.comment = comment;
-    }
-
-    @Override
-    public String getFieldName() {
-      return name;
-    }
-
-    @Override
-    public ObjectInspector getFieldObjectInspector() {
-      return oi;
-    }
-
-    @Override
-    public int getFieldID() {
-      return 0;
-    }
-
-    @Override
-    public String getFieldComment() {
-      return comment;
-    }
-  }
-
-  TajoStructObjectInspector(Schema schema) {
-    structFields = new ArrayList<>(schema.size());
-
-    for (Column c: schema.getRootColumns()) {
-      try {
-        TajoStructField field = new TajoStructField(c.getSimpleName(),
-          ObjectInspectorFactory.buildObjectInspectorByType(c.getDataType().getType()));
-        structFields.add(field);
-      } catch (UnsupportedException e) {
-        LOG.error(e.getMessage());
-      }
-    }
-  }
-
-  @Override
-  public List<? extends StructField> getAllStructFieldRefs() {
-    return structFields;
-  }
-
-  @Override
-  public StructField getStructFieldRef(String s) {
-    for (TajoStructField field:structFields) {
-      if (field.getFieldName().equals(s)) {
-        return field;
-      }
-    }
-
-    return null;
-  }
-
-  @Override
-  public Object getStructFieldData(Object o, StructField structField) {
-    return null;
-  }
-
-  @Override
-  public List<Object> getStructFieldsDataAsList(Object o) {
-    return null;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "STRUCT";
-  }
-
-  @Override
-  public Category getCategory() {
-    return Category.STRUCT;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoTimestampObjectInspector.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoTimestampObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoTimestampObjectInspector.java
deleted file mode 100644
index bb887e7..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoTimestampObjectInspector.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import java.sql.Timestamp;
-
-public class TajoTimestampObjectInspector extends TajoPrimitiveObjectInspector implements TimestampObjectInspector {
-  @Override
-  public PrimitiveTypeInfo getTypeInfo() {
-    return TypeInfoFactory.timestampTypeInfo;
-  }
-
-  @Override
-  public PrimitiveCategory getPrimitiveCategory() {
-    return PrimitiveCategory.TIMESTAMP;
-  }
-
-  @Override
-  public Class<?> getPrimitiveWritableClass() {
-    return null;
-  }
-
-  @Override
-  public TimestampWritable getPrimitiveWritableObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Class<?> getJavaPrimitiveClass() {
-    return null;
-  }
-
-  @Override
-  public Timestamp getPrimitiveJavaObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public Object copyObject(Object o) {
-    return null;
-  }
-
-  @Override
-  public boolean preferWritable() {
-    return false;
-  }
-
-  @Override
-  public String getTypeName() {
-    return "TIMESTAMP";
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BinaryColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BinaryColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BinaryColumnStatistics.java
deleted file mode 100644
index bee29fb..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BinaryColumnStatistics.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Statistics for binary columns.
- */
-public interface BinaryColumnStatistics extends ColumnStatistics {
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BitFieldWriter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BitFieldWriter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BitFieldWriter.java
deleted file mode 100644
index 23719bd..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BitFieldWriter.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-
-class BitFieldWriter {
-  private RunLengthByteWriter output;
-  private final int bitSize;
-  private byte current = 0;
-  private int bitsLeft = 8;
-
-  BitFieldWriter(PositionedOutputStream output,
-                 int bitSize) throws IOException {
-    this.output = new RunLengthByteWriter(output);
-    this.bitSize = bitSize;
-  }
-
-  private void writeByte() throws IOException {
-    output.write(current);
-    current = 0;
-    bitsLeft = 8;
-  }
-
-  void flush() throws IOException {
-    if (bitsLeft != 8) {
-      writeByte();
-    }
-    output.flush();
-  }
-
-  void write(int value) throws IOException {
-    int bitsToWrite = bitSize;
-    while (bitsToWrite > bitsLeft) {
-      // add the bits to the bottom of the current word
-      current |= value >>> (bitsToWrite - bitsLeft);
-      // subtract out the bits we just added
-      bitsToWrite -= bitsLeft;
-      // zero out the bits above bitsToWrite
-      value &= (1 << bitsToWrite) - 1;
-      writeByte();
-    }
-    bitsLeft -= bitsToWrite;
-    current |= value << bitsLeft;
-    if (bitsLeft == 0) {
-      writeByte();
-    }
-  }
-
-  void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(8 - bitsLeft);
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BloomFilterIO.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BloomFilterIO.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BloomFilterIO.java
deleted file mode 100644
index 9d7c09c..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BloomFilterIO.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import com.google.common.primitives.Longs;
-import org.apache.tajo.storage.thirdparty.orc.util.BloomFilter;
-
-public class BloomFilterIO extends BloomFilter {
-
-  public BloomFilterIO(long expectedEntries) {
-    super(expectedEntries, DEFAULT_FPP);
-  }
-
-  public BloomFilterIO(long expectedEntries, double fpp) {
-    super(expectedEntries, fpp);
-  }
-
-/**
- * Initializes the BloomFilter from the given Orc BloomFilter
- */
-  public BloomFilterIO(OrcProto.BloomFilter bloomFilter) {
-    this.bitSet = new BitSet(Longs.toArray(bloomFilter.getBitsetList()));
-    this.numHashFunctions = bloomFilter.getNumHashFunctions();
-    this.numBits = (int) this.bitSet.bitSize();
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BooleanColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BooleanColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BooleanColumnStatistics.java
deleted file mode 100644
index 0f55697..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/BooleanColumnStatistics.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Statistics for boolean columns.
- */
-public interface BooleanColumnStatistics extends ColumnStatistics {
-  long getFalseCount();
-
-  long getTrueCount();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferAllocatorPool.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferAllocatorPool.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferAllocatorPool.java
new file mode 100644
index 0000000..de60bb2
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferAllocatorPool.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.thirdparty.orc;
+
+import com.google.common.collect.ComparisonChain;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+import java.nio.ByteBuffer;
+import java.util.Map;
+import java.util.TreeMap;
+
+public class ByteBufferAllocatorPool {
+  private static final class Key implements Comparable<Key> {
+    private final int capacity;
+    private final long insertionGeneration;
+
+    Key(int capacity, long insertionGeneration) {
+      this.capacity = capacity;
+      this.insertionGeneration = insertionGeneration;
+    }
+
+    @Override
+    public int compareTo(Key other) {
+      return ComparisonChain.start().compare(capacity, other.capacity)
+          .compare(insertionGeneration, other.insertionGeneration).result();
+    }
+
+    @Override
+    public boolean equals(Object rhs) {
+      if (rhs == null) {
+        return false;
+      }
+      try {
+        Key o = (Key) rhs;
+        return (compareTo(o) == 0);
+      } catch (ClassCastException e) {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return new HashCodeBuilder().append(capacity).append(insertionGeneration)
+          .toHashCode();
+    }
+  }
+
+  private final TreeMap<Key, ByteBuffer> buffers = new TreeMap<Key, ByteBuffer>();
+
+  private final TreeMap<Key, ByteBuffer> directBuffers = new TreeMap<Key, ByteBuffer>();
+
+  private long currentGeneration = 0;
+
+  private final TreeMap<Key, ByteBuffer> getBufferTree(boolean direct) {
+    return direct ? directBuffers : buffers;
+  }
+
+  public void clear() {
+    buffers.clear();
+    directBuffers.clear();
+  }
+
+  public ByteBuffer getBuffer(boolean direct, int length) {
+    TreeMap<Key, ByteBuffer> tree = getBufferTree(direct);
+    Map.Entry<Key, ByteBuffer> entry = tree.ceilingEntry(new Key(length, 0));
+    if (entry == null) {
+      return direct ? ByteBuffer.allocateDirect(length) : ByteBuffer
+          .allocate(length);
+    }
+    tree.remove(entry.getKey());
+    return entry.getValue();
+  }
+
+  public void putBuffer(ByteBuffer buffer) {
+    TreeMap<Key, ByteBuffer> tree = getBufferTree(buffer.isDirect());
+    while (true) {
+      Key key = new Key(buffer.capacity(), currentGeneration++);
+      if (!tree.containsKey(key)) {
+        tree.put(key, buffer);
+        return;
+      }
+      // Buffers are indexed by (capacity, generation).
+      // If our key is not unique on the first try, we try again
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferPoolAdapter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferPoolAdapter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferPoolAdapter.java
new file mode 100644
index 0000000..2e9aec1
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ByteBufferPoolAdapter.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.thirdparty.orc;
+
+import org.apache.hadoop.io.ByteBufferPool;
+
+import java.nio.ByteBuffer;
+
+public class ByteBufferPoolAdapter implements ByteBufferPool {
+  private ByteBufferAllocatorPool pool;
+
+  public ByteBufferPoolAdapter(ByteBufferAllocatorPool pool) {
+    this.pool = pool;
+  }
+
+  @Override
+  public final ByteBuffer getBuffer(boolean direct, int length) {
+    return this.pool.getBuffer(direct, length);
+  }
+
+  @Override
+  public final void putBuffer(ByteBuffer buffer) {
+    this.pool.putBuffer(buffer);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatistics.java
deleted file mode 100644
index b317e41..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatistics.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Statistics that are available for all types of columns.
- */
-public interface ColumnStatistics {
-  /**
-   * Get the number of values in this column. It will differ from the number
-   * of rows because of NULL values and repeated values.
-   * @return the number of values
-   */
-  long getNumberOfValues();
-
-  /**
-   * Returns true if there are nulls in the scope of column statistics.
-   * @return true if null present else false
-   */
-  boolean hasNull();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatisticsImpl.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatisticsImpl.java
deleted file mode 100644
index d74f989..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/ColumnStatisticsImpl.java
+++ /dev/null
@@ -1,1017 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.tajo.datum.Datum;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-
-class ColumnStatisticsImpl implements ColumnStatistics {
-
-  private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
-      implements BooleanColumnStatistics {
-    private long trueCount = 0;
-
-    BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
-      trueCount = bkt.getCount(0);
-    }
-
-    BooleanStatisticsImpl() {
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      trueCount = 0;
-    }
-
-    @Override
-    void updateBoolean(boolean value) {
-      if (value) {
-        trueCount += 1;
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BooleanStatisticsImpl) {
-        BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
-        trueCount += bkt.trueCount;
-      } else {
-        if (isStatsExists() && trueCount != 0) {
-          throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.BucketStatistics.Builder bucket =
-        OrcProto.BucketStatistics.newBuilder();
-      bucket.addCount(trueCount);
-      builder.setBucketStatistics(bucket);
-      return builder;
-    }
-
-    @Override
-    public long getFalseCount() {
-      return getNumberOfValues() - trueCount;
-    }
-
-    @Override
-    public long getTrueCount() {
-      return trueCount;
-    }
-
-    @Override
-    public String toString() {
-      return super.toString() + " true: " + trueCount;
-    }
-  }
-
-  private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
-      implements IntegerColumnStatistics {
-
-    private long minimum = Long.MAX_VALUE;
-    private long maximum = Long.MIN_VALUE;
-    private long sum = 0;
-    private boolean hasMinimum = false;
-    private boolean overflow = false;
-
-    IntegerStatisticsImpl() {
-    }
-
-    IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
-      if (intStat.hasMinimum()) {
-        hasMinimum = true;
-        minimum = intStat.getMinimum();
-      }
-      if (intStat.hasMaximum()) {
-        maximum = intStat.getMaximum();
-      }
-      if (intStat.hasSum()) {
-        sum = intStat.getSum();
-      } else {
-        overflow = true;
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      hasMinimum = false;
-      minimum = Long.MAX_VALUE;
-      maximum = Long.MIN_VALUE;
-      sum = 0;
-      overflow = false;
-    }
-
-    @Override
-    void updateInteger(long value) {
-      if (!hasMinimum) {
-        hasMinimum = true;
-        minimum = value;
-        maximum = value;
-      } else if (value < minimum) {
-        minimum = value;
-      } else if (value > maximum) {
-        maximum = value;
-      }
-      if (!overflow) {
-        boolean wasPositive = sum >= 0;
-        sum += value;
-        if ((value >= 0) == wasPositive) {
-          overflow = (sum >= 0) != wasPositive;
-        }
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof IntegerStatisticsImpl) {
-        IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
-        if (!hasMinimum) {
-          hasMinimum = otherInt.hasMinimum;
-          minimum = otherInt.minimum;
-          maximum = otherInt.maximum;
-        } else if (otherInt.hasMinimum) {
-          if (otherInt.minimum < minimum) {
-            minimum = otherInt.minimum;
-          }
-          if (otherInt.maximum > maximum) {
-            maximum = otherInt.maximum;
-          }
-        }
-
-        overflow |= otherInt.overflow;
-        if (!overflow) {
-          boolean wasPositive = sum >= 0;
-          sum += otherInt.sum;
-          if ((otherInt.sum >= 0) == wasPositive) {
-            overflow = (sum >= 0) != wasPositive;
-          }
-        }
-      } else {
-        if (isStatsExists() && hasMinimum) {
-          throw new IllegalArgumentException("Incompatible merging of integer column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.IntegerStatistics.Builder intb =
-        OrcProto.IntegerStatistics.newBuilder();
-      if (hasMinimum) {
-        intb.setMinimum(minimum);
-        intb.setMaximum(maximum);
-      }
-      if (!overflow) {
-        intb.setSum(sum);
-      }
-      builder.setIntStatistics(intb);
-      return builder;
-    }
-
-    @Override
-    public long getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public long getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public boolean isSumDefined() {
-      return !overflow;
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (hasMinimum) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-      }
-      if (!overflow) {
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
-       implements DoubleColumnStatistics {
-    private boolean hasMinimum = false;
-    private double minimum = Double.MAX_VALUE;
-    private double maximum = Double.MIN_VALUE;
-    private double sum = 0;
-
-    DoubleStatisticsImpl() {
-    }
-
-    DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
-      if (dbl.hasMinimum()) {
-        hasMinimum = true;
-        minimum = dbl.getMinimum();
-      }
-      if (dbl.hasMaximum()) {
-        maximum = dbl.getMaximum();
-      }
-      if (dbl.hasSum()) {
-        sum = dbl.getSum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      hasMinimum = false;
-      minimum = Double.MAX_VALUE;
-      maximum = Double.MIN_VALUE;
-      sum = 0;
-    }
-
-    @Override
-    void updateDouble(double value) {
-      if (!hasMinimum) {
-        hasMinimum = true;
-        minimum = value;
-        maximum = value;
-      } else if (value < minimum) {
-        minimum = value;
-      } else if (value > maximum) {
-        maximum = value;
-      }
-      sum += value;
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DoubleStatisticsImpl) {
-        DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
-        if (!hasMinimum) {
-          hasMinimum = dbl.hasMinimum;
-          minimum = dbl.minimum;
-          maximum = dbl.maximum;
-        } else if (dbl.hasMinimum) {
-          if (dbl.minimum < minimum) {
-            minimum = dbl.minimum;
-          }
-          if (dbl.maximum > maximum) {
-            maximum = dbl.maximum;
-          }
-        }
-        sum += dbl.sum;
-      } else {
-        if (isStatsExists() && hasMinimum) {
-          throw new IllegalArgumentException("Incompatible merging of double column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder builder = super.serialize();
-      OrcProto.DoubleStatistics.Builder dbl =
-        OrcProto.DoubleStatistics.newBuilder();
-      if (hasMinimum) {
-        dbl.setMinimum(minimum);
-        dbl.setMaximum(maximum);
-      }
-      dbl.setSum(sum);
-      builder.setDoubleStatistics(dbl);
-      return builder;
-    }
-
-    @Override
-    public double getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public double getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public double getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (hasMinimum) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-      }
-      buf.append(" sum: ");
-      buf.append(sum);
-      return buf.toString();
-    }
-  }
-
-  protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
-      implements StringColumnStatistics {
-    private String minimum = null;
-    private String maximum = null;
-    private long sum = 0;
-
-    StringStatisticsImpl() {
-    }
-
-    StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.StringStatistics str = stats.getStringStatistics();
-      if (str.hasMaximum()) {
-        maximum = str.getMaximum();
-      }
-      if (str.hasMinimum()) {
-        minimum = str.getMinimum();
-      }
-      if(str.hasSum()) {
-        sum = str.getSum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-      sum = 0;
-    }
-
-    @Override
-    void updateString(String value) {
-      if (minimum == null) {
-        maximum = minimum = value;
-      } else if (minimum.compareTo(value) > 0) {
-        minimum = value;
-      } else if (maximum.compareTo(value) < 0) {
-        maximum = value;
-      }
-      sum += value.length();
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof StringStatisticsImpl) {
-        StringStatisticsImpl str = (StringStatisticsImpl) other;
-        if (minimum == null) {
-          if (str.minimum != null) {
-            maximum = str.getMaximum();
-            minimum = str.getMinimum();
-          } else {
-          /* both are empty */
-            maximum = minimum = null;
-          }
-        } else if (str.minimum != null) {
-          if (minimum.compareTo(str.minimum) > 0) {
-            minimum = str.getMinimum();
-          }
-          if (maximum.compareTo(str.maximum) < 0) {
-            maximum = str.getMaximum();
-          }
-        }
-        sum += str.sum;
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of string column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.StringStatistics.Builder str =
-        OrcProto.StringStatistics.newBuilder();
-      if (getNumberOfValues() != 0) {
-        str.setMinimum(getMinimum());
-        str.setMaximum(getMaximum());
-        str.setSum(sum);
-      }
-      result.setStringStatistics(str);
-      return result;
-    }
-
-    @Override
-    public String getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public String getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements
-    BinaryColumnStatistics {
-
-    private long sum = 0;
-
-    BinaryStatisticsImpl() {
-    }
-
-    BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics();
-      if (binStats.hasSum()) {
-        sum = binStats.getSum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      sum = 0;
-    }
-
-    @Override
-    void updateBinary(Datum value) {
-      sum += value.size();
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BinaryColumnStatistics) {
-        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
-        sum += bin.sum;
-      } else {
-        if (isStatsExists() && sum != 0) {
-          throw new IllegalArgumentException("Incompatible merging of binary column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    public long getSum() {
-      return sum;
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder();
-      bin.setSum(sum);
-      result.setBinaryStatistics(bin);
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" sum: ");
-        buf.append(sum);
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl
-      implements DecimalColumnStatistics {
-    private HiveDecimal minimum = null;
-    private HiveDecimal maximum = null;
-    private HiveDecimal sum = HiveDecimal.ZERO;
-
-    DecimalStatisticsImpl() {
-    }
-
-    DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
-      if (dec.hasMaximum()) {
-        maximum = HiveDecimal.create(dec.getMaximum());
-      }
-      if (dec.hasMinimum()) {
-        minimum = HiveDecimal.create(dec.getMinimum());
-      }
-      if (dec.hasSum()) {
-        sum = HiveDecimal.create(dec.getSum());
-      } else {
-        sum = null;
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-      sum = HiveDecimal.ZERO;
-    }
-
-    @Override
-    void updateDecimal(HiveDecimal value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum.compareTo(value) > 0) {
-        minimum = value;
-      } else if (maximum.compareTo(value) < 0) {
-        maximum = value;
-      }
-      if (sum != null) {
-        sum = sum.add(value);
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DecimalStatisticsImpl) {
-        DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = dec.minimum;
-          maximum = dec.maximum;
-          sum = dec.sum;
-        } else if (dec.minimum != null) {
-          if (minimum.compareTo(dec.minimum) > 0) {
-            minimum = dec.minimum;
-          }
-          if (maximum.compareTo(dec.maximum) < 0) {
-            maximum = dec.maximum;
-          }
-          if (sum == null || dec.sum == null) {
-            sum = null;
-          } else {
-            sum = sum.add(dec.sum);
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.DecimalStatistics.Builder dec =
-          OrcProto.DecimalStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        dec.setMinimum(minimum.toString());
-        dec.setMaximum(maximum.toString());
-      }
-      if (sum != null) {
-        dec.setSum(sum.toString());
-      }
-      result.setDecimalStatistics(dec);
-      return result;
-    }
-
-    @Override
-    public HiveDecimal getMinimum() {
-      return minimum;
-    }
-
-    @Override
-    public HiveDecimal getMaximum() {
-      return maximum;
-    }
-
-    @Override
-    public HiveDecimal getSum() {
-      return sum;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(minimum);
-        buf.append(" max: ");
-        buf.append(maximum);
-        if (sum != null) {
-          buf.append(" sum: ");
-          buf.append(sum);
-        }
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class DateStatisticsImpl extends ColumnStatisticsImpl
-      implements DateColumnStatistics {
-    private Integer minimum = null;
-    private Integer maximum = null;
-
-    DateStatisticsImpl() {
-    }
-
-    DateStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.DateStatistics dateStats = stats.getDateStatistics();
-      // min,max values serialized/deserialized as int (days since epoch)
-      if (dateStats.hasMaximum()) {
-        maximum = dateStats.getMaximum();
-      }
-      if (dateStats.hasMinimum()) {
-        minimum = dateStats.getMinimum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-    }
-
-    @Override
-    void updateDate(int daysSinceEpoch) {
-      if (minimum == null) {
-        minimum = daysSinceEpoch;
-        maximum = daysSinceEpoch;
-      } else if (minimum > daysSinceEpoch) {
-        minimum = daysSinceEpoch;
-      } else if (maximum < daysSinceEpoch) {
-        maximum = daysSinceEpoch;
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof DateStatisticsImpl) {
-        DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = dateStats.minimum;
-          maximum = dateStats.maximum;
-        } else if (dateStats.minimum != null) {
-          if (minimum > dateStats.minimum) {
-            minimum = dateStats.minimum;
-          }
-          if (maximum < dateStats.maximum) {
-            maximum = dateStats.maximum;
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of date column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.DateStatistics.Builder dateStats =
-          OrcProto.DateStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        dateStats.setMinimum(minimum);
-        dateStats.setMaximum(maximum);
-      }
-      result.setDateStatistics(dateStats);
-      return result;
-    }
-
-    private transient final DateWritable minDate = new DateWritable();
-    private transient final DateWritable maxDate = new DateWritable();
-
-    @Override
-    public Date getMinimum() {
-      if (minimum == null) {
-        return null;
-      }
-      minDate.set(minimum);
-      return minDate.get();
-    }
-
-    @Override
-    public Date getMaximum() {
-      if (maximum == null) {
-        return null;
-      }
-      maxDate.set(maximum);
-      return maxDate.get();
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-      }
-      return buf.toString();
-    }
-  }
-
-  private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl
-      implements TimestampColumnStatistics {
-    private Long minimum = null;
-    private Long maximum = null;
-
-    TimestampStatisticsImpl() {
-    }
-
-    TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) {
-      super(stats);
-      OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics();
-      // min,max values serialized/deserialized as int (milliseconds since epoch)
-      if (timestampStats.hasMaximum()) {
-        maximum = timestampStats.getMaximum();
-      }
-      if (timestampStats.hasMinimum()) {
-        minimum = timestampStats.getMinimum();
-      }
-    }
-
-    @Override
-    void reset() {
-      super.reset();
-      minimum = null;
-      maximum = null;
-    }
-
-    @Override
-    void updateTimestamp(Timestamp value) {
-      if (minimum == null) {
-        minimum = value.getTime();
-        maximum = value.getTime();
-      } else if (minimum > value.getTime()) {
-        minimum = value.getTime();
-      } else if (maximum < value.getTime()) {
-        maximum = value.getTime();
-      }
-    }
-
-    @Override
-    void merge(ColumnStatisticsImpl other) {
-      if (other instanceof TimestampStatisticsImpl) {
-        TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = timestampStats.minimum;
-          maximum = timestampStats.maximum;
-        } else if (timestampStats.minimum != null) {
-          if (minimum > timestampStats.minimum) {
-            minimum = timestampStats.minimum;
-          }
-          if (maximum < timestampStats.maximum) {
-            maximum = timestampStats.maximum;
-          }
-        }
-      } else {
-        if (isStatsExists() && minimum != null) {
-          throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
-        }
-      }
-      super.merge(other);
-    }
-
-    @Override
-    OrcProto.ColumnStatistics.Builder serialize() {
-      OrcProto.ColumnStatistics.Builder result = super.serialize();
-      OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics
-          .newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
-        timestampStats.setMinimum(minimum);
-        timestampStats.setMaximum(maximum);
-      }
-      result.setTimestampStatistics(timestampStats);
-      return result;
-    }
-
-    @Override
-    public Timestamp getMinimum() {
-      return minimum == null ? null : new Timestamp(minimum);
-    }
-
-    @Override
-    public Timestamp getMaximum() {
-      return maximum == null ? null : new Timestamp(maximum);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(super.toString());
-      if (getNumberOfValues() != 0) {
-        buf.append(" min: ");
-        buf.append(getMinimum());
-        buf.append(" max: ");
-        buf.append(getMaximum());
-      }
-      return buf.toString();
-    }
-  }
-
-  private long count = 0;
-  private boolean hasNull = false;
-
-  ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
-    if (stats.hasNumberOfValues()) {
-      count = stats.getNumberOfValues();
-    }
-
-    hasNull = !stats.hasHasNull() || stats.getHasNull();
-  }
-
-  ColumnStatisticsImpl() {
-  }
-
-  void increment() {
-    count += 1;
-  }
-
-  void setNull() {
-    hasNull = true;
-  }
-
-  void updateBoolean(boolean value) {
-    throw new UnsupportedOperationException("Can't update boolean");
-  }
-
-  void updateInteger(long value) {
-    throw new UnsupportedOperationException("Can't update integer");
-  }
-
-  void updateDouble(double value) {
-    throw new UnsupportedOperationException("Can't update double");
-  }
-
-  void updateString(String value) {
-    throw new UnsupportedOperationException("Can't update string");
-  }
-
-  void updateBinary(Datum value) {
-    throw new UnsupportedOperationException("Can't update binary");
-  }
-
-  void updateDecimal(HiveDecimal value) {
-    throw new UnsupportedOperationException("Can't update decimal");
-  }
-
-  void updateDate(int days) {
-    throw new UnsupportedOperationException("Can't update date");
-  }
-
-  void updateTimestamp(Timestamp value) {
-    throw new UnsupportedOperationException("Can't update timestamp");
-  }
-
-  boolean isStatsExists() {
-    return (count > 0 || hasNull);
-  }
-
-  void merge(ColumnStatisticsImpl stats) {
-    count += stats.count;
-    hasNull |= stats.hasNull;
-  }
-
-  void reset() {
-    count = 0;
-    hasNull = false;
-  }
-
-  @Override
-  public long getNumberOfValues() {
-    return count;
-  }
-
-  @Override
-  public boolean hasNull() {
-    return hasNull;
-  }
-
-  @Override
-  public String toString() {
-    return "count: " + count + " hasNull: " + hasNull;
-  }
-
-  OrcProto.ColumnStatistics.Builder serialize() {
-    OrcProto.ColumnStatistics.Builder builder =
-      OrcProto.ColumnStatistics.newBuilder();
-    builder.setNumberOfValues(count);
-    builder.setHasNull(hasNull);
-    return builder;
-  }
-
-  static ColumnStatisticsImpl create(ObjectInspector inspector) {
-    switch (inspector.getCategory()) {
-      case PRIMITIVE:
-        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
-          case BOOLEAN:
-            return new BooleanStatisticsImpl();
-          case BYTE:
-          case SHORT:
-          case INT:
-          case LONG:
-            return new IntegerStatisticsImpl();
-          case FLOAT:
-          case DOUBLE:
-            return new DoubleStatisticsImpl();
-          case STRING:
-          case CHAR:
-          case VARCHAR:
-            return new StringStatisticsImpl();
-          case DECIMAL:
-            return new DecimalStatisticsImpl();
-          case DATE:
-            return new DateStatisticsImpl();
-          case TIMESTAMP:
-            return new TimestampStatisticsImpl();
-          case BINARY:
-            return new BinaryStatisticsImpl();
-          default:
-            return new ColumnStatisticsImpl();
-        }
-      default:
-        return new ColumnStatisticsImpl();
-    }
-  }
-
-  static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
-    if (stats.hasBucketStatistics()) {
-      return new BooleanStatisticsImpl(stats);
-    } else if (stats.hasIntStatistics()) {
-      return new IntegerStatisticsImpl(stats);
-    } else if (stats.hasDoubleStatistics()) {
-      return new DoubleStatisticsImpl(stats);
-    } else if (stats.hasStringStatistics()) {
-      return new StringStatisticsImpl(stats);
-    } else if (stats.hasDecimalStatistics()) {
-      return new DecimalStatisticsImpl(stats);
-    } else if (stats.hasDateStatistics()) {
-      return new DateStatisticsImpl(stats);
-    } else if (stats.hasTimestampStatistics()) {
-      return new TimestampStatisticsImpl(stats);
-    } else if(stats.hasBinaryStatistics()) {
-      return new BinaryStatisticsImpl(stats);
-    } else {
-      return new ColumnStatisticsImpl(stats);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionCodec.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionCodec.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionCodec.java
deleted file mode 100644
index 769ca50..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionCodec.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import javax.annotation.Nullable;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-public interface CompressionCodec {
-
-  public enum Modifier {
-    /* speed/compression tradeoffs */
-    FASTEST,
-    FAST,
-    DEFAULT,
-    /* data sensitivity modifiers */
-    TEXT,
-    BINARY
-  };
-
-  /**
-   * Compress the in buffer to the out buffer.
-   * @param in the bytes to compress
-   * @param out the uncompressed bytes
-   * @param overflow put any additional bytes here
-   * @return true if the output is smaller than input
-   * @throws IOException
-   */
-  boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
-  ) throws IOException;
-
-  /**
-   * Decompress the in buffer to the out buffer.
-   * @param in the bytes to decompress
-   * @param out the decompressed bytes
-   * @throws IOException
-   */
-  void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
-
-  /**
-   * Produce a modified compression codec if the underlying algorithm allows
-   * modification.
-   *
-   * This does not modify the current object, but returns a new object if
-   * modifications are possible. Returns the same object if no modifications
-   * are possible.
-   * @param modifiers compression modifiers
-   * @return codec for use after optional modification
-   */
-  CompressionCodec modify(@Nullable EnumSet<Modifier> modifiers);
-
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionKind.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionKind.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionKind.java
deleted file mode 100644
index 8b16c67..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/CompressionKind.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * An enumeration that lists the generic compression algorithms that
- * can be applied to ORC files.
- */
-public enum CompressionKind {
-  NONE, ZLIB, SNAPPY, LZO
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DateColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DateColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DateColumnStatistics.java
deleted file mode 100644
index cb3405e..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DateColumnStatistics.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.util.Date;
-
-/**
- * Statistics for DATE columns.
- */
-public interface DateColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Date getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Date getMaximum();
-}

[4/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Posted by ji...@apache.org.

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OutStream.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OutStream.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OutStream.java
deleted file mode 100644
index f6cfd57..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OutStream.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-class OutStream extends PositionedOutputStream {
-
-  interface OutputReceiver {
-    /**
-     * Output the given buffer to the final destination
-     * @param buffer the buffer to output
-     * @throws IOException
-     */
-    void output(ByteBuffer buffer) throws IOException;
-  }
-
-  static final int HEADER_SIZE = 3;
-  private final String name;
-  private final OutputReceiver receiver;
-  // if enabled the stream will be suppressed when writing stripe
-  private boolean suppress;
-
-  /**
-   * Stores the uncompressed bytes that have been serialized, but not
-   * compressed yet. When this fills, we compress the entire buffer.
-   */
-  private ByteBuffer current = null;
-
-  /**
-   * Stores the compressed bytes until we have a full buffer and then outputs
-   * them to the receiver. If no compression is being done, this (and overflow)
-   * will always be null and the current buffer will be sent directly to the
-   * receiver.
-   */
-  private ByteBuffer compressed = null;
-
-  /**
-   * Since the compressed buffer may start with contents from previous
-   * compression blocks, we allocate an overflow buffer so that the
-   * output of the codec can be split between the two buffers. After the
-   * compressed buffer is sent to the receiver, the overflow buffer becomes
-   * the new compressed buffer.
-   */
-  private ByteBuffer overflow = null;
-  private final int bufferSize;
-  private final CompressionCodec codec;
-  private long compressedBytes = 0;
-  private long uncompressedBytes = 0;
-
-  OutStream(String name,
-            int bufferSize,
-            CompressionCodec codec,
-            OutputReceiver receiver) throws IOException {
-    this.name = name;
-    this.bufferSize = bufferSize;
-    this.codec = codec;
-    this.receiver = receiver;
-    this.suppress = false;
-  }
-
-  public void clear() throws IOException {
-    flush();
-    suppress = false;
-  }
-
-  /**
-   * Write the length of the compressed bytes. Life is much easier if the
-   * header is constant length, so just use 3 bytes. Considering most of the
-   * codecs want between 32k (snappy) and 256k (lzo, zlib), 3 bytes should
-   * be plenty. We also use the low bit for whether it is the original or
-   * compressed bytes.
-   * @param buffer the buffer to write the header to
-   * @param position the position in the buffer to write at
-   * @param val the size in the file
-   * @param original is it uncompressed
-   */
-  private static void writeHeader(ByteBuffer buffer,
-                                  int position,
-                                  int val,
-                                  boolean original) {
-    buffer.put(position, (byte) ((val << 1) + (original ? 1 : 0)));
-    buffer.put(position + 1, (byte) (val >> 7));
-    buffer.put(position + 2, (byte) (val >> 15));
-  }
-
-  private void getNewInputBuffer() throws IOException {
-    if (codec == null) {
-      current = ByteBuffer.allocate(bufferSize);
-    } else {
-      current = ByteBuffer.allocate(bufferSize + HEADER_SIZE);
-      writeHeader(current, 0, bufferSize, true);
-      current.position(HEADER_SIZE);
-    }
-  }
-
-  /**
-   * Allocate a new output buffer if we are compressing.
-   */
-  private ByteBuffer getNewOutputBuffer() throws IOException {
-    return ByteBuffer.allocate(bufferSize + HEADER_SIZE);
-  }
-
-  private void flip() throws IOException {
-    current.limit(current.position());
-    current.position(codec == null ? 0 : HEADER_SIZE);
-  }
-
-  @Override
-  public void write(int i) throws IOException {
-    if (current == null) {
-      getNewInputBuffer();
-    }
-    if (current.remaining() < 1) {
-      spill();
-    }
-    uncompressedBytes += 1;
-    current.put((byte) i);
-  }
-
-  @Override
-  public void write(byte[] bytes, int offset, int length) throws IOException {
-    if (current == null) {
-      getNewInputBuffer();
-    }
-    int remaining = Math.min(current.remaining(), length);
-    current.put(bytes, offset, remaining);
-    uncompressedBytes += remaining;
-    length -= remaining;
-    while (length != 0) {
-      spill();
-      offset += remaining;
-      remaining = Math.min(current.remaining(), length);
-      current.put(bytes, offset, remaining);
-      uncompressedBytes += remaining;
-      length -= remaining;
-    }
-  }
-
-  private void spill() throws IOException {
-    // if there isn't anything in the current buffer, don't spill
-    if (current == null ||
-        current.position() == (codec == null ? 0 : HEADER_SIZE)) {
-      return;
-    }
-    flip();
-    if (codec == null) {
-      receiver.output(current);
-      getNewInputBuffer();
-    } else {
-      if (compressed == null) {
-        compressed = getNewOutputBuffer();
-      } else if (overflow == null) {
-        overflow = getNewOutputBuffer();
-      }
-      int sizePosn = compressed.position();
-      compressed.position(compressed.position() + HEADER_SIZE);
-      if (codec.compress(current, compressed, overflow)) {
-        uncompressedBytes = 0;
-        // move position back to after the header
-        current.position(HEADER_SIZE);
-        current.limit(current.capacity());
-        // find the total bytes in the chunk
-        int totalBytes = compressed.position() - sizePosn - HEADER_SIZE;
-        if (overflow != null) {
-          totalBytes += overflow.position();
-        }
-        compressedBytes += totalBytes + HEADER_SIZE;
-        writeHeader(compressed, sizePosn, totalBytes, false);
-        // if we have less than the next header left, spill it.
-        if (compressed.remaining() < HEADER_SIZE) {
-          compressed.flip();
-          receiver.output(compressed);
-          compressed = overflow;
-          overflow = null;
-        }
-      } else {
-        compressedBytes += uncompressedBytes + HEADER_SIZE;
-        uncompressedBytes = 0;
-        // we are using the original, but need to spill the current
-        // compressed buffer first. So back up to where we started,
-        // flip it and add it to done.
-        if (sizePosn != 0) {
-          compressed.position(sizePosn);
-          compressed.flip();
-          receiver.output(compressed);
-          compressed = null;
-          // if we have an overflow, clear it and make it the new compress
-          // buffer
-          if (overflow != null) {
-            overflow.clear();
-            compressed = overflow;
-            overflow = null;
-          }
-        } else {
-          compressed.clear();
-          if (overflow != null) {
-            overflow.clear();
-          }
-        }
-
-        // now add the current buffer into the done list and get a new one.
-        current.position(0);
-        // update the header with the current length
-        writeHeader(current, 0, current.limit() - HEADER_SIZE, true);
-        receiver.output(current);
-        getNewInputBuffer();
-      }
-    }
-  }
-
-  void getPosition(PositionRecorder recorder) throws IOException {
-    if (codec == null) {
-      recorder.addPosition(uncompressedBytes);
-    } else {
-      recorder.addPosition(compressedBytes);
-      recorder.addPosition(uncompressedBytes);
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    spill();
-    if (compressed != null && compressed.position() != 0) {
-      compressed.flip();
-      receiver.output(compressed);
-      compressed = null;
-    }
-    uncompressedBytes = 0;
-    compressedBytes = 0;
-    overflow = null;
-    current = null;
-  }
-
-  @Override
-  public String toString() {
-    return name;
-  }
-
-  @Override
-  public long getBufferSize() {
-    long result = 0;
-    if (current != null) {
-      result += current.capacity();
-    }
-    if (compressed != null) {
-      result += compressed.capacity();
-    }
-    if (overflow != null) {
-      result += overflow.capacity();
-    }
-    return result;
-  }
-
-  /**
-   * Set suppress flag
-   */
-  public void suppress() {
-    suppress = true;
-  }
-
-  /**
-   * Returns the state of suppress flag
-   * @return value of suppress flag
-   */
-  public boolean isSuppressed() {
-    return suppress;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionRecorder.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionRecorder.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionRecorder.java
deleted file mode 100644
index a39926e..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionRecorder.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * An interface for recording positions in a stream.
- */
-interface PositionRecorder {
-  void addPosition(long offset);
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionedOutputStream.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionedOutputStream.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionedOutputStream.java
deleted file mode 100644
index 748c98c..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/PositionedOutputStream.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-abstract class PositionedOutputStream extends OutputStream {
-
-  /**
-   * Record the current position to the recorder.
-   * @param recorder the object that receives the position
-   * @throws IOException
-   */
-  abstract void getPosition(PositionRecorder recorder) throws IOException;
-
-  /**
-   * Get the memory size currently allocated as buffer associated with this
-   * stream.
-   * @return the number of bytes used by buffers.
-   */
-  abstract long getBufferSize();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RecordReaderUtils.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RecordReaderUtils.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RecordReaderUtils.java
new file mode 100644
index 0000000..bc882e0
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RecordReaderUtils.java
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.thirdparty.orc;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.DataReader;
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.BufferChunk;
+import org.apache.orc.impl.DirectDecompressionCodec;
+import org.apache.orc.impl.OutStream;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+public class RecordReaderUtils {
+
+  public static class DefaultDataReader implements DataReader {
+    private FSDataInputStream file;
+    private ByteBufferAllocatorPool pool;
+    private ZeroCopyAdapter zcr;
+    private FileSystem fs;
+    private Path path;
+    private boolean useZeroCopy;
+    private CompressionCodec codec;
+    private long readBytes = 0;
+
+    public DefaultDataReader(
+        FileSystem fs, Path path, boolean useZeroCopy, CompressionCodec codec) {
+      this.fs = fs;
+      this.path = path;
+      this.useZeroCopy = useZeroCopy;
+      this.codec = codec;
+    }
+
+    @Override
+    public void open() throws IOException {
+      this.file = fs.open(path);
+      if (useZeroCopy) {
+        pool = new ByteBufferAllocatorPool();
+        zcr = RecordReaderUtils.createZeroCopyShim(file, codec, pool);
+      } else {
+        pool = null;
+        zcr = null;
+      }
+    }
+
+    @Override
+    public DiskRangeList readFileData(
+        DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException {
+      return readDiskRanges(file, zcr, baseOffset, range, doForceDirect);
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (file != null) {
+        file.close();
+      }
+      if (pool != null) {
+        pool.clear();
+      }
+    }
+
+    @Override
+    public boolean isTrackingDiskRanges() {
+      return zcr != null;
+    }
+
+    @Override
+    public void releaseBuffer(ByteBuffer buffer) {
+      zcr.releaseBuffer(buffer);
+    }
+
+    public long getReadBytes() {
+      return readBytes;
+    }
+
+    /**
+     * Read the list of ranges from the file.
+     * @param file the file to read
+     * @param base the base of the stripe
+     * @param range the disk ranges within the stripe to read
+     * @return the bytes read for each disk range, which is the same length as
+     *    ranges
+     * @throws IOException
+     */
+    private DiskRangeList readDiskRanges(FSDataInputStream file,
+                                         ZeroCopyAdapter zcr,
+                                        long base,
+                                        DiskRangeList range,
+                                        boolean doForceDirect) throws IOException {
+      if (range == null) return null;
+      DiskRangeList prev = range.prev;
+      if (prev == null) {
+        prev = new DiskRangeList.MutateHelper(range);
+      }
+      while (range != null) {
+        if (range.hasData()) {
+          range = range.next;
+          continue;
+        }
+        int len = (int) (range.getEnd() - range.getOffset());
+        long off = range.getOffset();
+        if (zcr != null) {
+          file.seek(base + off);
+          boolean hasReplaced = false;
+          while (len > 0) {
+            ByteBuffer partial = zcr.readBuffer(len, false);
+            readBytes += partial.remaining();
+            BufferChunk bc = new BufferChunk(partial, off);
+            if (!hasReplaced) {
+              range.replaceSelfWith(bc);
+              hasReplaced = true;
+            } else {
+              range.insertAfter(bc);
+            }
+            range = bc;
+            int read = partial.remaining();
+            len -= read;
+            off += read;
+          }
+        } else {
+          // Don't use HDFS ByteBuffer API because it has no readFully, and is buggy and pointless.
+          byte[] buffer = new byte[len];
+          file.readFully((base + off), buffer, 0, buffer.length);
+          readBytes += buffer.length;
+          ByteBuffer bb = null;
+          if (doForceDirect) {
+            bb = ByteBuffer.allocateDirect(len);
+            bb.put(buffer);
+            bb.position(0);
+            bb.limit(len);
+          } else {
+            bb = ByteBuffer.wrap(buffer);
+          }
+          range = range.replaceSelfWith(new BufferChunk(bb, range.getOffset()));
+        }
+        range = range.next;
+      }
+      return prev.next;
+    }
+  }
+
+  public static DataReader createDefaultDataReader(
+      FileSystem fs, Path path, boolean useZeroCopy, CompressionCodec codec) {
+    return new DefaultDataReader(fs, path, useZeroCopy, codec);
+  }
+
+  public static boolean[] findPresentStreamsByColumn(
+      List<OrcProto.Stream> streamList, List<OrcProto.Type> types) {
+    boolean[] hasNull = new boolean[types.size()];
+    for(OrcProto.Stream stream: streamList) {
+      if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.PRESENT)) {
+        hasNull[stream.getColumn()] = true;
+      }
+    }
+    return hasNull;
+  }
+
+  /**
+   * Does region A overlap region B? The end points are inclusive on both sides.
+   * @param leftA A's left point
+   * @param rightA A's right point
+   * @param leftB B's left point
+   * @param rightB B's right point
+   * @return Does region A overlap region B?
+   */
+  static boolean overlap(long leftA, long rightA, long leftB, long rightB) {
+    if (leftA <= leftB) {
+      return rightA >= leftB;
+    }
+    return rightB >= leftA;
+  }
+
+  public static void addEntireStreamToRanges(
+      long offset, long length, DiskRangeList.CreateHelper list, boolean doMergeBuffers) {
+    list.addOrMerge(offset, offset + length, doMergeBuffers, false);
+  }
+
+  public static void addRgFilteredStreamToRanges(OrcProto.Stream stream,
+                                                 boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index,
+                                                 OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull,
+                                                 long offset, long length, DiskRangeList.CreateHelper list, boolean doMergeBuffers) {
+    for (int group = 0; group < includedRowGroups.length; ++group) {
+      if (!includedRowGroups[group]) continue;
+      int posn = getIndexPosition(
+          encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull);
+      long start = index.getEntry(group).getPositions(posn);
+      final long nextGroupOffset;
+      boolean isLast = group == (includedRowGroups.length - 1);
+      nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn);
+
+      start += offset;
+      long end = offset + estimateRgEndOffset(
+          isCompressed, isLast, nextGroupOffset, length, compressionSize);
+      list.addOrMerge(start, end, doMergeBuffers, true);
+    }
+  }
+
+  public static long estimateRgEndOffset(boolean isCompressed, boolean isLast,
+                                         long nextGroupOffset, long streamLength, int bufferSize) {
+    // figure out the worst case last location
+    // if adjacent groups have the same compressed block offset then stretch the slop
+    // by factor of 2 to safely accommodate the next compression block.
+    // One for the current compression block and another for the next compression block.
+    long slop = isCompressed ? 2 * (OutStream.HEADER_SIZE + bufferSize) : WORST_UNCOMPRESSED_SLOP;
+    return isLast ? streamLength : Math.min(streamLength, nextGroupOffset + slop);
+  }
+
+  private static final int BYTE_STREAM_POSITIONS = 1;
+  private static final int RUN_LENGTH_BYTE_POSITIONS = BYTE_STREAM_POSITIONS + 1;
+  private static final int BITFIELD_POSITIONS = RUN_LENGTH_BYTE_POSITIONS + 1;
+  private static final int RUN_LENGTH_INT_POSITIONS = BYTE_STREAM_POSITIONS + 1;
+
+  /**
+   * Get the offset in the index positions for the column that the given
+   * stream starts.
+   * @param columnEncoding the encoding of the column
+   * @param columnType the type of the column
+   * @param streamType the kind of the stream
+   * @param isCompressed is the file compressed
+   * @param hasNulls does the column have a PRESENT stream?
+   * @return the number of positions that will be used for that stream
+   */
+  public static int getIndexPosition(OrcProto.ColumnEncoding.Kind columnEncoding,
+                                     OrcProto.Type.Kind columnType,
+                                     OrcProto.Stream.Kind streamType,
+                                     boolean isCompressed,
+                                     boolean hasNulls) {
+    if (streamType == OrcProto.Stream.Kind.PRESENT) {
+      return 0;
+    }
+    int compressionValue = isCompressed ? 1 : 0;
+    int base = hasNulls ? (BITFIELD_POSITIONS + compressionValue) : 0;
+    switch (columnType) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+      case DATE:
+      case STRUCT:
+      case MAP:
+      case LIST:
+      case UNION:
+        return base;
+      case CHAR:
+      case VARCHAR:
+      case STRING:
+        if (columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+            columnEncoding == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+          return base;
+        } else {
+          if (streamType == OrcProto.Stream.Kind.DATA) {
+            return base;
+          } else {
+            return base + BYTE_STREAM_POSITIONS + compressionValue;
+          }
+        }
+      case BINARY:
+        if (streamType == OrcProto.Stream.Kind.DATA) {
+          return base;
+        }
+        return base + BYTE_STREAM_POSITIONS + compressionValue;
+      case DECIMAL:
+        if (streamType == OrcProto.Stream.Kind.DATA) {
+          return base;
+        }
+        return base + BYTE_STREAM_POSITIONS + compressionValue;
+      case TIMESTAMP:
+        if (streamType == OrcProto.Stream.Kind.DATA) {
+          return base;
+        }
+        return base + RUN_LENGTH_INT_POSITIONS + compressionValue;
+      default:
+        throw new IllegalArgumentException("Unknown type " + columnType);
+    }
+  }
+
+  // for uncompressed streams, what is the most overlap with the following set
+  // of rows (long vint literal group).
+  static final int WORST_UNCOMPRESSED_SLOP = 2 + 8 * 512;
+
+  /**
+   * Is this stream part of a dictionary?
+   * @return is this part of a dictionary?
+   */
+  public static boolean isDictionary(OrcProto.Stream.Kind kind,
+                                     OrcProto.ColumnEncoding encoding) {
+    assert kind != OrcProto.Stream.Kind.DICTIONARY_COUNT;
+    OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind();
+    return kind == OrcProto.Stream.Kind.DICTIONARY_DATA ||
+        (kind == OrcProto.Stream.Kind.LENGTH &&
+            (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+                encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2));
+  }
+
+  /**
+   * Build a string representation of a list of disk ranges.
+   * @param range ranges to stringify
+   * @return the resulting string
+   */
+  public static String stringifyDiskRanges(DiskRangeList range) {
+    StringBuilder buffer = new StringBuilder();
+    buffer.append("[");
+    boolean isFirst = true;
+    while (range != null) {
+      if (!isFirst) {
+        buffer.append(", {");
+      } else {
+        buffer.append("{");
+      }
+      isFirst = false;
+      buffer.append(range.toString());
+      buffer.append("}");
+      range = range.next;
+    }
+    buffer.append("]");
+    return buffer.toString();
+  }
+
+  public static List<DiskRange> getStreamBuffers(DiskRangeList range, long offset, long length) {
+    // This assumes sorted ranges (as do many other parts of ORC code.
+    ArrayList<DiskRange> buffers = new ArrayList<DiskRange>();
+    if (length == 0) return buffers;
+    long streamEnd = offset + length;
+    boolean inRange = false;
+    while (range != null) {
+      if (!inRange) {
+        if (range.getEnd() <= offset) {
+          range = range.next;
+          continue; // Skip until we are in range.
+        }
+        inRange = true;
+        if (range.getOffset() < offset) {
+          // Partial first buffer, add a slice of it.
+          buffers.add(range.sliceAndShift(offset, Math.min(streamEnd, range.getEnd()), -offset));
+          if (range.getEnd() >= streamEnd) break; // Partial first buffer is also partial last buffer.
+          range = range.next;
+          continue;
+        }
+      } else if (range.getOffset() >= streamEnd) {
+        break;
+      }
+      if (range.getEnd() > streamEnd) {
+        // Partial last buffer (may also be the first buffer), add a slice of it.
+        buffers.add(range.sliceAndShift(range.getOffset(), streamEnd, -offset));
+        break;
+      }
+      // Buffer that belongs entirely to one stream.
+      // TODO: ideally we would want to reuse the object and remove it from the list, but we cannot
+      //       because bufferChunks is also used by clearStreams for zcr. Create a useless dup.
+      buffers.add(range.sliceAndShift(range.getOffset(), range.getEnd(), -offset));
+      if (range.getEnd() == streamEnd) break;
+      range = range.next;
+    }
+    return buffers;
+  }
+
+  static ZeroCopyAdapter createZeroCopyShim(FSDataInputStream file,
+                                            CompressionCodec codec, ByteBufferAllocatorPool pool) throws IOException {
+    if ((codec == null || ((codec instanceof DirectDecompressionCodec)
+        && ((DirectDecompressionCodec) codec).isAvailable()))) {
+      /* codec is null or is available */
+      return new ZeroCopyAdapter(file, pool);
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RedBlackTree.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RedBlackTree.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RedBlackTree.java
deleted file mode 100644
index 2482f93..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RedBlackTree.java
+++ /dev/null
@@ -1,309 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * A memory efficient red-black tree that does not allocate any objects per
- * an element. This class is abstract and assumes that the child class
- * handles the key and comparisons with the key.
- */
-abstract class RedBlackTree {
-  public static final int NULL = -1;
-
-  // Various values controlling the offset of the data within the array.
-  private static final int LEFT_OFFSET = 0;
-  private static final int RIGHT_OFFSET = 1;
-  private static final int ELEMENT_SIZE = 2;
-
-  protected int size = 0;
-  private final DynamicIntArray data;
-  protected int root = NULL;
-  protected int lastAdd = 0;
-  private boolean wasAdd = false;
-
-  /**
-   * Create a set with the given initial capacity.
-   */
-  public RedBlackTree(int initialCapacity) {
-    data = new DynamicIntArray(initialCapacity * ELEMENT_SIZE);
-  }
-
-  /**
-   * Insert a new node into the data array, growing the array as necessary.
-   *
-   * @return Returns the position of the new node.
-   */
-  private int insert(int left, int right, boolean isRed) {
-    int position = size;
-    size += 1;
-    setLeft(position, left, isRed);
-    setRight(position, right);
-    return position;
-  }
-
-  /**
-   * Compare the value at the given position to the new value.
-   * @return 0 if the values are the same, -1 if the new value is smaller and
-   *         1 if the new value is larger.
-   */
-  protected abstract int compareValue(int position);
-
-  /**
-   * Is the given node red as opposed to black? To prevent having an extra word
-   * in the data array, we just the low bit on the left child index.
-   */
-  protected boolean isRed(int position) {
-    return position != NULL &&
-        (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
-  }
-
-  /**
-   * Set the red bit true or false.
-   */
-  private void setRed(int position, boolean isRed) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    if (isRed) {
-      data.set(offset, data.get(offset) | 1);
-    } else {
-      data.set(offset, data.get(offset) & ~1);
-    }
-  }
-
-  /**
-   * Get the left field of the given position.
-   */
-  protected int getLeft(int position) {
-    return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
-  }
-
-  /**
-   * Get the right field of the given position.
-   */
-  protected int getRight(int position) {
-    return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
-  }
-
-  /**
-   * Set the left field of the given position.
-   * Note that we are storing the node color in the low bit of the left pointer.
-   */
-  private void setLeft(int position, int left) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    data.set(offset, (left << 1) | (data.get(offset) & 1));
-  }
-
-  /**
-   * Set the left field of the given position.
-   * Note that we are storing the node color in the low bit of the left pointer.
-   */
-  private void setLeft(int position, int left, boolean isRed) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    data.set(offset, (left << 1) | (isRed ? 1 : 0));
-  }
-
-  /**
-   * Set the right field of the given position.
-   */
-  private void setRight(int position, int right) {
-    data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
-  }
-
-  /**
-   * Insert or find a given key in the tree and rebalance the tree correctly.
-   * Rebalancing restores the red-black aspect of the tree to maintain the
-   * invariants:
-   *   1. If a node is red, both of its children are black.
-   *   2. Each child of a node has the same black height (the number of black
-   *      nodes between it and the leaves of the tree).
-   *
-   * Inserted nodes are at the leaves and are red, therefore there is at most a
-   * violation of rule 1 at the node we just put in. Instead of always keeping
-   * the parents, this routine passing down the context.
-   *
-   * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
-   * left-right mirror images of each other). See Algorighms by Cormen,
-   * Leiserson, and Rivest for the explaination of the subcases.
-   *
-   * @param node The node that we are fixing right now.
-   * @param fromLeft Did we come down from the left?
-   * @param parent Nodes' parent
-   * @param grandparent Parent's parent
-   * @param greatGrandparent Grandparent's parent
-   * @return Does parent also need to be checked and/or fixed?
-   */
-  private boolean add(int node, boolean fromLeft, int parent,
-                      int grandparent, int greatGrandparent) {
-    if (node == NULL) {
-      if (root == NULL) {
-        lastAdd = insert(NULL, NULL, false);
-        root = lastAdd;
-        wasAdd = true;
-        return false;
-      } else {
-        lastAdd = insert(NULL, NULL, true);
-        node = lastAdd;
-        wasAdd = true;
-        // connect the new node into the tree
-        if (fromLeft) {
-          setLeft(parent, node);
-        } else {
-          setRight(parent, node);
-        }
-      }
-    } else {
-      int compare = compareValue(node);
-      boolean keepGoing;
-
-      // Recurse down to find where the node needs to be added
-      if (compare < 0) {
-        keepGoing = add(getLeft(node), true, node, parent, grandparent);
-      } else if (compare > 0) {
-        keepGoing = add(getRight(node), false, node, parent, grandparent);
-      } else {
-        lastAdd = node;
-        wasAdd = false;
-        return false;
-      }
-
-      // we don't need to fix the root (because it is always set to black)
-      if (node == root || !keepGoing) {
-        return false;
-      }
-    }
-
-
-    // Do we need to fix this node? Only if there are two reds right under each
-    // other.
-    if (isRed(node) && isRed(parent)) {
-      if (parent == getLeft(grandparent)) {
-        int uncle = getRight(grandparent);
-        if (isRed(uncle)) {
-          // case 1.1
-          setRed(parent, false);
-          setRed(uncle, false);
-          setRed(grandparent, true);
-          return true;
-        } else {
-          if (node == getRight(parent)) {
-            // case 1.2
-            // swap node and parent
-            int tmp = node;
-            node = parent;
-            parent = tmp;
-            // left-rotate on node
-            setLeft(grandparent, parent);
-            setRight(node, getLeft(parent));
-            setLeft(parent, node);
-          }
-
-          // case 1.2 and 1.3
-          setRed(parent, false);
-          setRed(grandparent, true);
-
-          // right-rotate on grandparent
-          if (greatGrandparent == NULL) {
-            root = parent;
-          } else if (getLeft(greatGrandparent) == grandparent) {
-            setLeft(greatGrandparent, parent);
-          } else {
-            setRight(greatGrandparent, parent);
-          }
-          setLeft(grandparent, getRight(parent));
-          setRight(parent, grandparent);
-          return false;
-        }
-      } else {
-        int uncle = getLeft(grandparent);
-        if (isRed(uncle)) {
-          // case 2.1
-          setRed(parent, false);
-          setRed(uncle, false);
-          setRed(grandparent, true);
-          return true;
-        } else {
-          if (node == getLeft(parent)) {
-            // case 2.2
-            // swap node and parent
-            int tmp = node;
-            node = parent;
-            parent = tmp;
-            // right-rotate on node
-            setRight(grandparent, parent);
-            setLeft(node, getRight(parent));
-            setRight(parent, node);
-          }
-          // case 2.2 and 2.3
-          setRed(parent, false);
-          setRed(grandparent, true);
-          // left-rotate on grandparent
-          if (greatGrandparent == NULL) {
-            root = parent;
-          } else if (getRight(greatGrandparent) == grandparent) {
-            setRight(greatGrandparent, parent);
-          } else {
-            setLeft(greatGrandparent, parent);
-          }
-          setRight(grandparent, getLeft(parent));
-          setLeft(parent, grandparent);
-          return false;
-        }
-      }
-    } else {
-      return true;
-    }
-  }
-
-  /**
-   * Add the new key to the tree.
-   * @return true if the element is a new one.
-   */
-  protected boolean add() {
-    add(root, false, NULL, NULL, NULL);
-    if (wasAdd) {
-      setRed(root, false);
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * Get the number of elements in the set.
-   */
-  public int size() {
-    return size;
-  }
-
-  /**
-   * Reset the table to empty.
-   */
-  public void clear() {
-    root = NULL;
-    size = 0;
-    data.clear();
-  }
-
-  /**
-   * Get the buffer size in bytes.
-   */
-  public long getSizeInBytes() {
-    return data.getSizeInBytes();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthByteWriter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthByteWriter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthByteWriter.java
deleted file mode 100644
index 0953cdd..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthByteWriter.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-
-/**
- * A streamFactory that writes a sequence of bytes. A control byte is written before
- * each run with positive values 0 to 127 meaning 2 to 129 repetitions. If the
- * bytes is -1 to -128, 1 to 128 literal byte values follow.
- */
-class RunLengthByteWriter {
-  static final int MIN_REPEAT_SIZE = 3;
-  static final int MAX_LITERAL_SIZE = 128;
-  static final int MAX_REPEAT_SIZE= 127 + MIN_REPEAT_SIZE;
-  private final PositionedOutputStream output;
-  private final byte[] literals = new byte[MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private boolean repeat = false;
-  private int tailRunLength = 0;
-
-  RunLengthByteWriter(PositionedOutputStream output) {
-    this.output = output;
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-      if (repeat) {
-        output.write(numLiterals - MIN_REPEAT_SIZE);
-        output.write(literals, 0, 1);
-     } else {
-        output.write(-numLiterals);
-        output.write(literals, 0, numLiterals);
-      }
-      repeat = false;
-      tailRunLength = 0;
-      numLiterals = 0;
-    }
-  }
-
-  void flush() throws IOException {
-    writeValues();
-    output.flush();
-  }
-
-  void write(byte value) throws IOException {
-    if (numLiterals == 0) {
-      literals[numLiterals++] = value;
-      tailRunLength = 1;
-    } else if (repeat) {
-      if (value == literals[0]) {
-        numLiterals += 1;
-        if (numLiterals == MAX_REPEAT_SIZE) {
-          writeValues();
-        }
-      } else {
-        writeValues();
-        literals[numLiterals++] = value;
-        tailRunLength = 1;
-      }
-    } else {
-      if (value == literals[numLiterals - 1]) {
-        tailRunLength += 1;
-      } else {
-        tailRunLength = 1;
-      }
-      if (tailRunLength == MIN_REPEAT_SIZE) {
-        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
-          repeat = true;
-          numLiterals += 1;
-        } else {
-          numLiterals -= MIN_REPEAT_SIZE - 1;
-          writeValues();
-          literals[0] = value;
-          repeat = true;
-          numLiterals = MIN_REPEAT_SIZE;
-        }
-      } else {
-        literals[numLiterals++] = value;
-        if (numLiterals == MAX_LITERAL_SIZE) {
-          writeValues();
-        }
-      }
-    }
-  }
-
-  void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriter.java
deleted file mode 100644
index 867f041..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriter.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-
-/**
- * A streamFactory that writes a sequence of integers. A control byte is written before
- * each run with positive values 0 to 127 meaning 3 to 130 repetitions, each
- * repetition is offset by a delta. If the control byte is -1 to -128, 1 to 128
- * literal vint values follow.
- */
-class RunLengthIntegerWriter implements IntegerWriter {
-  static final int MIN_REPEAT_SIZE = 3;
-  static final int MAX_DELTA = 127;
-  static final int MIN_DELTA = -128;
-  static final int MAX_LITERAL_SIZE = 128;
-  private static final int MAX_REPEAT_SIZE = 127 + MIN_REPEAT_SIZE;
-  private final PositionedOutputStream output;
-  private final boolean signed;
-  private final long[] literals = new long[MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private long delta = 0;
-  private boolean repeat = false;
-  private int tailRunLength = 0;
-  private SerializationUtils utils;
-
-  RunLengthIntegerWriter(PositionedOutputStream output,
-                         boolean signed) {
-    this.output = output;
-    this.signed = signed;
-    this.utils = new SerializationUtils();
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-      if (repeat) {
-        output.write(numLiterals - MIN_REPEAT_SIZE);
-        output.write((byte) delta);
-        if (signed) {
-          utils.writeVslong(output, literals[0]);
-        } else {
-          utils.writeVulong(output, literals[0]);
-        }
-      } else {
-        output.write(-numLiterals);
-        for(int i=0; i < numLiterals; ++i) {
-          if (signed) {
-            utils.writeVslong(output, literals[i]);
-          } else {
-            utils.writeVulong(output, literals[i]);
-          }
-        }
-      }
-      repeat = false;
-      numLiterals = 0;
-      tailRunLength = 0;
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    writeValues();
-    output.flush();
-  }
-
-  @Override
-  public void write(long value) throws IOException {
-    if (numLiterals == 0) {
-      literals[numLiterals++] = value;
-      tailRunLength = 1;
-    } else if (repeat) {
-      if (value == literals[0] + delta * numLiterals) {
-        numLiterals += 1;
-        if (numLiterals == MAX_REPEAT_SIZE) {
-          writeValues();
-        }
-      } else {
-        writeValues();
-        literals[numLiterals++] = value;
-        tailRunLength = 1;
-      }
-    } else {
-      if (tailRunLength == 1) {
-        delta = value - literals[numLiterals - 1];
-        if (delta < MIN_DELTA || delta > MAX_DELTA) {
-          tailRunLength = 1;
-        } else {
-          tailRunLength = 2;
-        }
-      } else if (value == literals[numLiterals - 1] + delta) {
-        tailRunLength += 1;
-      } else {
-        delta = value - literals[numLiterals - 1];
-        if (delta < MIN_DELTA || delta > MAX_DELTA) {
-          tailRunLength = 1;
-        } else {
-          tailRunLength = 2;
-        }
-      }
-      if (tailRunLength == MIN_REPEAT_SIZE) {
-        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
-          repeat = true;
-          numLiterals += 1;
-        } else {
-          numLiterals -= MIN_REPEAT_SIZE - 1;
-          long base = literals[numLiterals];
-          writeValues();
-          literals[0] = base;
-          repeat = true;
-          numLiterals = MIN_REPEAT_SIZE;
-        }
-      } else {
-        literals[numLiterals++] = value;
-        if (numLiterals == MAX_LITERAL_SIZE) {
-          writeValues();
-        }
-      }
-    }
-  }
-
-  @Override
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriterV2.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriterV2.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriterV2.java
deleted file mode 100644
index 7237b2e..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/RunLengthIntegerWriterV2.java
+++ /dev/null
@@ -1,832 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-
-/**
- * A writer that performs light weight compression over sequence of integers.
- * <p>
- * There are four types of lightweight integer compression
- * <ul>
- * <li>SHORT_REPEAT</li>
- * <li>DIRECT</li>
- * <li>PATCHED_BASE</li>
- * <li>DELTA</li>
- * </ul>
- * </p>
- * The description and format for these types are as below:
- * <p>
- * <b>SHORT_REPEAT:</b> Used for short repeated integer sequences.
- * <ul>
- * <li>1 byte header
- * <ul>
- * <li>2 bits for encoding type</li>
- * <li>3 bits for bytes required for repeating value</li>
- * <li>3 bits for repeat count (MIN_REPEAT + run length)</li>
- * </ul>
- * </li>
- * <li>Blob - repeat value (fixed bytes)</li>
- * </ul>
- * </p>
- * <p>
- * <b>DIRECT:</b> Used for random integer sequences whose number of bit
- * requirement doesn't vary a lot.
- * <ul>
- * <li>2 bytes header
- * <ul>
- * 1st byte
- * <li>2 bits for encoding type</li>
- * <li>5 bits for fixed bit width of values in blob</li>
- * <li>1 bit for storing MSB of run length</li>
- * </ul>
- * <ul>
- * 2nd byte
- * <li>8 bits for lower run length bits</li>
- * </ul>
- * </li>
- * <li>Blob - stores the direct values using fixed bit width. The length of the
- * data blob is (fixed width * run length) bits long</li>
- * </ul>
- * </p>
- * <p>
- * <b>PATCHED_BASE:</b> Used for random integer sequences whose number of bit
- * requirement varies beyond a threshold.
- * <ul>
- * <li>4 bytes header
- * <ul>
- * 1st byte
- * <li>2 bits for encoding type</li>
- * <li>5 bits for fixed bit width of values in blob</li>
- * <li>1 bit for storing MSB of run length</li>
- * </ul>
- * <ul>
- * 2nd byte
- * <li>8 bits for lower run length bits</li>
- * </ul>
- * <ul>
- * 3rd byte
- * <li>3 bits for bytes required to encode base value</li>
- * <li>5 bits for patch width</li>
- * </ul>
- * <ul>
- * 4th byte
- * <li>3 bits for patch gap width</li>
- * <li>5 bits for patch length</li>
- * </ul>
- * </li>
- * <li>Base value - Stored using fixed number of bytes. If MSB is set, base
- * value is negative else positive. Length of base value is (base width * 8)
- * bits.</li>
- * <li>Data blob - Base reduced values as stored using fixed bit width. Length
- * of data blob is (fixed width * run length) bits.</li>
- * <li>Patch blob - Patch blob is a list of gap and patch value. Each entry in
- * the patch list is (patch width + patch gap width) bits long. Gap between the
- * subsequent elements to be patched are stored in upper part of entry whereas
- * patch values are stored in lower part of entry. Length of patch blob is
- * ((patch width + patch gap width) * patch length) bits.</li>
- * </ul>
- * </p>
- * <p>
- * <b>DELTA</b> Used for monotonically increasing or decreasing sequences,
- * sequences with fixed delta values or long repeated sequences.
- * <ul>
- * <li>2 bytes header
- * <ul>
- * 1st byte
- * <li>2 bits for encoding type</li>
- * <li>5 bits for fixed bit width of values in blob</li>
- * <li>1 bit for storing MSB of run length</li>
- * </ul>
- * <ul>
- * 2nd byte
- * <li>8 bits for lower run length bits</li>
- * </ul>
- * </li>
- * <li>Base value - encoded as varint</li>
- * <li>Delta base - encoded as varint</li>
- * <li>Delta blob - only positive values. monotonicity and orderness are decided
- * based on the sign of the base value and delta base</li>
- * </ul>
- * </p>
- */
-class RunLengthIntegerWriterV2 implements IntegerWriter {
-
-  public enum EncodingType {
-    SHORT_REPEAT, DIRECT, PATCHED_BASE, DELTA
-  }
-
-  static final int MAX_SCOPE = 512;
-  static final int MIN_REPEAT = 3;
-  private static final int MAX_SHORT_REPEAT_LENGTH = 10;
-  private long prevDelta = 0;
-  private int fixedRunLength = 0;
-  private int variableRunLength = 0;
-  private final long[] literals = new long[MAX_SCOPE];
-  private final PositionedOutputStream output;
-  private final boolean signed;
-  private EncodingType encoding;
-  private int numLiterals;
-  private final long[] zigzagLiterals = new long[MAX_SCOPE];
-  private final long[] baseRedLiterals = new long[MAX_SCOPE];
-  private final long[] adjDeltas = new long[MAX_SCOPE];
-  private long fixedDelta;
-  private int zzBits90p;
-  private int zzBits100p;
-  private int brBits95p;
-  private int brBits100p;
-  private int bitsDeltaMax;
-  private int patchWidth;
-  private int patchGapWidth;
-  private int patchLength;
-  private long[] gapVsPatchList;
-  private long min;
-  private boolean isFixedDelta;
-  private SerializationUtils utils;
-  private boolean alignedBitpacking;
-
-  RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed) {
-    this(output, signed, true);
-  }
-
-  RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed,
-      boolean alignedBitpacking) {
-    this.output = output;
-    this.signed = signed;
-    this.alignedBitpacking = alignedBitpacking;
-    this.utils = new SerializationUtils();
-    clear();
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-
-      if (encoding.equals(EncodingType.SHORT_REPEAT)) {
-        writeShortRepeatValues();
-      } else if (encoding.equals(EncodingType.DIRECT)) {
-        writeDirectValues();
-      } else if (encoding.equals(EncodingType.PATCHED_BASE)) {
-        writePatchedBaseValues();
-      } else {
-        writeDeltaValues();
-      }
-
-      // clear all the variables
-      clear();
-    }
-  }
-
-  private void writeDeltaValues() throws IOException {
-    int len = 0;
-    int fb = bitsDeltaMax;
-    int efb = 0;
-
-    if (alignedBitpacking) {
-      fb = utils.getClosestAlignedFixedBits(fb);
-    }
-
-    if (isFixedDelta) {
-      // if fixed run length is greater than threshold then it will be fixed
-      // delta sequence with delta value 0 else fixed delta sequence with
-      // non-zero delta value
-      if (fixedRunLength > MIN_REPEAT) {
-        // ex. sequence: 2 2 2 2 2 2 2 2
-        len = fixedRunLength - 1;
-        fixedRunLength = 0;
-      } else {
-        // ex. sequence: 4 6 8 10 12 14 16
-        len = variableRunLength - 1;
-        variableRunLength = 0;
-      }
-    } else {
-      // fixed width 0 is used for long repeating values.
-      // sequences that require only 1 bit to encode will have an additional bit
-      if (fb == 1) {
-        fb = 2;
-      }
-      efb = utils.encodeBitWidth(fb);
-      efb = efb << 1;
-      len = variableRunLength - 1;
-      variableRunLength = 0;
-    }
-
-    // extract the 9th bit of run length
-    final int tailBits = (len & 0x100) >>> 8;
-
-    // create first byte of the header
-    final int headerFirstByte = getOpcode() | efb | tailBits;
-
-    // second byte of the header stores the remaining 8 bits of runlength
-    final int headerSecondByte = len & 0xff;
-
-    // write header
-    output.write(headerFirstByte);
-    output.write(headerSecondByte);
-
-    // store the first value from zigzag literal array
-    if (signed) {
-      utils.writeVslong(output, literals[0]);
-    } else {
-      utils.writeVulong(output, literals[0]);
-    }
-
-    if (isFixedDelta) {
-      // if delta is fixed then we don't need to store delta blob
-      utils.writeVslong(output, fixedDelta);
-    } else {
-      // store the first value as delta value using zigzag encoding
-      utils.writeVslong(output, adjDeltas[0]);
-
-      // adjacent delta values are bit packed. The length of adjDeltas array is
-      // always one less than the number of literals (delta difference for n
-      // elements is n-1). We have already written one element, write the
-      // remaining numLiterals - 2 elements here
-      utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output);
-    }
-  }
-
-  private void writePatchedBaseValues() throws IOException {
-
-    // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding
-    // because patch is applied to MSB bits. For example: If fixed bit width of
-    // base value is 7 bits and if patch is 3 bits, the actual value is
-    // constructed by shifting the patch to left by 7 positions.
-    // actual_value = patch << 7 | base_value
-    // So, if we align base_value then actual_value can not be reconstructed.
-
-    // write the number of fixed bits required in next 5 bits
-    final int fb = brBits95p;
-    final int efb = utils.encodeBitWidth(fb) << 1;
-
-    // adjust variable run length, they are one off
-    variableRunLength -= 1;
-
-    // extract the 9th bit of run length
-    final int tailBits = (variableRunLength & 0x100) >>> 8;
-
-    // create first byte of the header
-    final int headerFirstByte = getOpcode() | efb | tailBits;
-
-    // second byte of the header stores the remaining 8 bits of runlength
-    final int headerSecondByte = variableRunLength & 0xff;
-
-    // if the min value is negative toggle the sign
-    final boolean isNegative = min < 0 ? true : false;
-    if (isNegative) {
-      min = -min;
-    }
-
-    // find the number of bytes required for base and shift it by 5 bits
-    // to accommodate patch width. The additional bit is used to store the sign
-    // of the base value.
-    final int baseWidth = utils.findClosestNumBits(min) + 1;
-    final int baseBytes = baseWidth % 8 == 0 ? baseWidth / 8 : (baseWidth / 8) + 1;
-    final int bb = (baseBytes - 1) << 5;
-
-    // if the base value is negative then set MSB to 1
-    if (isNegative) {
-      min |= (1L << ((baseBytes * 8) - 1));
-    }
-
-    // third byte contains 3 bits for number of bytes occupied by base
-    // and 5 bits for patchWidth
-    final int headerThirdByte = bb | utils.encodeBitWidth(patchWidth);
-
-    // fourth byte contains 3 bits for page gap width and 5 bits for
-    // patch length
-    final int headerFourthByte = (patchGapWidth - 1) << 5 | patchLength;
-
-    // write header
-    output.write(headerFirstByte);
-    output.write(headerSecondByte);
-    output.write(headerThirdByte);
-    output.write(headerFourthByte);
-
-    // write the base value using fixed bytes in big endian order
-    for(int i = baseBytes - 1; i >= 0; i--) {
-      byte b = (byte) ((min >>> (i * 8)) & 0xff);
-      output.write(b);
-    }
-
-    // base reduced literals are bit packed
-    int closestFixedBits = utils.getClosestFixedBits(fb);
-
-    utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits,
-        output);
-
-    // write patch list
-    closestFixedBits = utils.getClosestFixedBits(patchGapWidth + patchWidth);
-
-    utils.writeInts(gapVsPatchList, 0, gapVsPatchList.length, closestFixedBits,
-        output);
-
-    // reset run length
-    variableRunLength = 0;
-  }
-
-  /**
-   * Store the opcode in 2 MSB bits
-   * @return opcode
-   */
-  private int getOpcode() {
-    return encoding.ordinal() << 6;
-  }
-
-  private void writeDirectValues() throws IOException {
-
-    // write the number of fixed bits required in next 5 bits
-    int fb = zzBits100p;
-
-    if (alignedBitpacking) {
-      fb = utils.getClosestAlignedFixedBits(fb);
-    }
-
-    final int efb = utils.encodeBitWidth(fb) << 1;
-
-    // adjust variable run length
-    variableRunLength -= 1;
-
-    // extract the 9th bit of run length
-    final int tailBits = (variableRunLength & 0x100) >>> 8;
-
-    // create first byte of the header
-    final int headerFirstByte = getOpcode() | efb | tailBits;
-
-    // second byte of the header stores the remaining 8 bits of runlength
-    final int headerSecondByte = variableRunLength & 0xff;
-
-    // write header
-    output.write(headerFirstByte);
-    output.write(headerSecondByte);
-
-    // bit packing the zigzag encoded literals
-    utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output);
-
-    // reset run length
-    variableRunLength = 0;
-  }
-
-  private void writeShortRepeatValues() throws IOException {
-    // get the value that is repeating, compute the bits and bytes required
-    long repeatVal = 0;
-    if (signed) {
-      repeatVal = utils.zigzagEncode(literals[0]);
-    } else {
-      repeatVal = literals[0];
-    }
-
-    final int numBitsRepeatVal = utils.findClosestNumBits(repeatVal);
-    final int numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? numBitsRepeatVal >>> 3
-        : (numBitsRepeatVal >>> 3) + 1;
-
-    // write encoding type in top 2 bits
-    int header = getOpcode();
-
-    // write the number of bytes required for the value
-    header |= ((numBytesRepeatVal - 1) << 3);
-
-    // write the run length
-    fixedRunLength -= MIN_REPEAT;
-    header |= fixedRunLength;
-
-    // write the header
-    output.write(header);
-
-    // write the repeating value in big endian byte order
-    for(int i = numBytesRepeatVal - 1; i >= 0; i--) {
-      int b = (int) ((repeatVal >>> (i * 8)) & 0xff);
-      output.write(b);
-    }
-
-    fixedRunLength = 0;
-  }
-
-  private void determineEncoding() {
-
-    // we need to compute zigzag values for DIRECT encoding if we decide to
-    // break early for delta overflows or for shorter runs
-    computeZigZagLiterals();
-
-    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
-
-    // not a big win for shorter runs to determine encoding
-    if (numLiterals <= MIN_REPEAT) {
-      encoding = EncodingType.DIRECT;
-      return;
-    }
-
-    // DELTA encoding check
-
-    // for identifying monotonic sequences
-    boolean isIncreasing = true;
-    boolean isDecreasing = true;
-    this.isFixedDelta = true;
-
-    this.min = literals[0];
-    long max = literals[0];
-    final long initialDelta = literals[1] - literals[0];
-    long currDelta = initialDelta;
-    long deltaMax = initialDelta;
-    this.adjDeltas[0] = initialDelta;
-
-    for (int i = 1; i < numLiterals; i++) {
-      final long l1 = literals[i];
-      final long l0 = literals[i - 1];
-      currDelta = l1 - l0;
-      min = Math.min(min, l1);
-      max = Math.max(max, l1);
-
-      isIncreasing &= (l0 <= l1);
-      isDecreasing &= (l0 >= l1);
-
-      isFixedDelta &= (currDelta == initialDelta);
-      if (i > 1) {
-        adjDeltas[i - 1] = Math.abs(currDelta);
-        deltaMax = Math.max(deltaMax, adjDeltas[i - 1]);
-      }
-    }
-
-    // its faster to exit under delta overflow condition without checking for
-    // PATCHED_BASE condition as encoding using DIRECT is faster and has less
-    // overhead than PATCHED_BASE
-    if (!utils.isSafeSubtract(max, min)) {
-      encoding = EncodingType.DIRECT;
-      return;
-    }
-
-    // invariant - subtracting any number from any other in the literals after
-    // this point won't overflow
-
-    // if initialDelta is 0 then we cannot delta encode as we cannot identify
-    // the sign of deltas (increasing or decreasing)
-    if (initialDelta != 0) {
-
-      // if min is equal to max then the delta is 0, this condition happens for
-      // fixed values run >10 which cannot be encoded with SHORT_REPEAT
-      if (min == max) {
-        assert isFixedDelta : min + "==" + max +
-            ", isFixedDelta cannot be false";
-        assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
-        fixedDelta = 0;
-        encoding = EncodingType.DELTA;
-        return;
-      }
-
-      if (isFixedDelta) {
-        assert currDelta == initialDelta
-            : "currDelta should be equal to initialDelta for fixed delta encoding";
-        encoding = EncodingType.DELTA;
-        fixedDelta = currDelta;
-        return;
-      }
-
-      // stores the number of bits required for packing delta blob in
-      // delta encoding
-      bitsDeltaMax = utils.findClosestNumBits(deltaMax);
-
-      // monotonic condition
-      if (isIncreasing || isDecreasing) {
-        encoding = EncodingType.DELTA;
-        return;
-      }
-    }
-
-    // PATCHED_BASE encoding check
-
-    // percentile values are computed for the zigzag encoded values. if the
-    // number of bit requirement between 90th and 100th percentile varies
-    // beyond a threshold then we need to patch the values. if the variation
-    // is not significant then we can use direct encoding
-
-    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 0.9);
-    int diffBitsLH = zzBits100p - zzBits90p;
-
-    // if the difference between 90th percentile and 100th percentile fixed
-    // bits is > 1 then we need patch the values
-    if (diffBitsLH > 1) {
-
-      // patching is done only on base reduced values.
-      // remove base from literals
-      for (int i = 0; i < numLiterals; i++) {
-        baseRedLiterals[i] = literals[i] - min;
-      }
-
-      // 95th percentile width is used to determine max allowed value
-      // after which patching will be done
-      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95);
-
-      // 100th percentile is used to compute the max patch width
-      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0);
-
-      // after base reducing the values, if the difference in bits between
-      // 95th percentile and 100th percentile value is zero then there
-      // is no point in patching the values, in which case we will
-      // fallback to DIRECT encoding.
-      // The decision to use patched base was based on zigzag values, but the
-      // actual patching is done on base reduced literals.
-      if ((brBits100p - brBits95p) != 0) {
-        encoding = EncodingType.PATCHED_BASE;
-        preparePatchedBlob();
-        return;
-      } else {
-        encoding = EncodingType.DIRECT;
-        return;
-      }
-    } else {
-      // if difference in bits between 95th percentile and 100th percentile is
-      // 0, then patch length will become 0. Hence we will fallback to direct
-      encoding = EncodingType.DIRECT;
-      return;
-    }
-  }
-
-  private void computeZigZagLiterals() {
-    // populate zigzag encoded literals
-    long zzEncVal = 0;
-    for (int i = 0; i < numLiterals; i++) {
-      if (signed) {
-        zzEncVal = utils.zigzagEncode(literals[i]);
-      } else {
-        zzEncVal = literals[i];
-      }
-      zigzagLiterals[i] = zzEncVal;
-    }
-  }
-
-  private void preparePatchedBlob() {
-    // mask will be max value beyond which patch will be generated
-    long mask = (1L << brBits95p) - 1;
-
-    // since we are considering only 95 percentile, the size of gap and
-    // patch array can contain only be 5% values
-    patchLength = (int) Math.ceil((numLiterals * 0.05));
-
-    int[] gapList = new int[patchLength];
-    long[] patchList = new long[patchLength];
-
-    // #bit for patch
-    patchWidth = brBits100p - brBits95p;
-    patchWidth = utils.getClosestFixedBits(patchWidth);
-
-    // if patch bit requirement is 64 then it will not possible to pack
-    // gap and patch together in a long. To make sure gap and patch can be
-    // packed together adjust the patch width
-    if (patchWidth == 64) {
-      patchWidth = 56;
-      brBits95p = 8;
-      mask = (1L << brBits95p) - 1;
-    }
-
-    int gapIdx = 0;
-    int patchIdx = 0;
-    int prev = 0;
-    int gap = 0;
-    int maxGap = 0;
-
-    for(int i = 0; i < numLiterals; i++) {
-      // if value is above mask then create the patch and record the gap
-      if (baseRedLiterals[i] > mask) {
-        gap = i - prev;
-        if (gap > maxGap) {
-          maxGap = gap;
-        }
-
-        // gaps are relative, so store the previous patched value index
-        prev = i;
-        gapList[gapIdx++] = gap;
-
-        // extract the most significant bits that are over mask bits
-        long patch = baseRedLiterals[i] >>> brBits95p;
-        patchList[patchIdx++] = patch;
-
-        // strip off the MSB to enable safe bit packing
-        baseRedLiterals[i] &= mask;
-      }
-    }
-
-    // adjust the patch length to number of entries in gap list
-    patchLength = gapIdx;
-
-    // if the element to be patched is the first and only element then
-    // max gap will be 0, but to store the gap as 0 we need atleast 1 bit
-    if (maxGap == 0 && patchLength != 0) {
-      patchGapWidth = 1;
-    } else {
-      patchGapWidth = utils.findClosestNumBits(maxGap);
-    }
-
-    // special case: if the patch gap width is greater than 256, then
-    // we need 9 bits to encode the gap width. But we only have 3 bits in
-    // header to record the gap width. To deal with this case, we will save
-    // two entries in patch list in the following way
-    // 256 gap width => 0 for patch value
-    // actual gap - 256 => actual patch value
-    // We will do the same for gap width = 511. If the element to be patched is
-    // the last element in the scope then gap width will be 511. In this case we
-    // will have 3 entries in the patch list in the following way
-    // 255 gap width => 0 for patch value
-    // 255 gap width => 0 for patch value
-    // 1 gap width => actual patch value
-    if (patchGapWidth > 8) {
-      patchGapWidth = 8;
-      // for gap = 511, we need two additional entries in patch list
-      if (maxGap == 511) {
-        patchLength += 2;
-      } else {
-        patchLength += 1;
-      }
-    }
-
-    // create gap vs patch list
-    gapIdx = 0;
-    patchIdx = 0;
-    gapVsPatchList = new long[patchLength];
-    for(int i = 0; i < patchLength; i++) {
-      long g = gapList[gapIdx++];
-      long p = patchList[patchIdx++];
-      while (g > 255) {
-        gapVsPatchList[i++] = (255L << patchWidth);
-        g -= 255;
-      }
-
-      // store patch value in LSBs and gap in MSBs
-      gapVsPatchList[i] = (g << patchWidth) | p;
-    }
-  }
-
-  /**
-   * clears all the variables
-   */
-  private void clear() {
-    numLiterals = 0;
-    encoding = null;
-    prevDelta = 0;
-    fixedDelta = 0;
-    zzBits90p = 0;
-    zzBits100p = 0;
-    brBits95p = 0;
-    brBits100p = 0;
-    bitsDeltaMax = 0;
-    patchGapWidth = 0;
-    patchLength = 0;
-    patchWidth = 0;
-    gapVsPatchList = null;
-    min = 0;
-    isFixedDelta = true;
-  }
-
-  @Override
-  public void flush() throws IOException {
-    if (numLiterals != 0) {
-      if (variableRunLength != 0) {
-        determineEncoding();
-        writeValues();
-      } else if (fixedRunLength != 0) {
-        if (fixedRunLength < MIN_REPEAT) {
-          variableRunLength = fixedRunLength;
-          fixedRunLength = 0;
-          determineEncoding();
-          writeValues();
-        } else if (fixedRunLength >= MIN_REPEAT
-            && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
-          encoding = EncodingType.SHORT_REPEAT;
-          writeValues();
-        } else {
-          encoding = EncodingType.DELTA;
-          isFixedDelta = true;
-          writeValues();
-        }
-      }
-    }
-    output.flush();
-  }
-
-  @Override
-  public void write(long val) throws IOException {
-    if (numLiterals == 0) {
-      initializeLiterals(val);
-    } else {
-      if (numLiterals == 1) {
-        prevDelta = val - literals[0];
-        literals[numLiterals++] = val;
-        // if both values are same count as fixed run else variable run
-        if (val == literals[0]) {
-          fixedRunLength = 2;
-          variableRunLength = 0;
-        } else {
-          fixedRunLength = 0;
-          variableRunLength = 2;
-        }
-      } else {
-        long currentDelta = val - literals[numLiterals - 1];
-        if (prevDelta == 0 && currentDelta == 0) {
-          // fixed delta run
-
-          literals[numLiterals++] = val;
-
-          // if variable run is non-zero then we are seeing repeating
-          // values at the end of variable run in which case keep
-          // updating variable and fixed runs
-          if (variableRunLength > 0) {
-            fixedRunLength = 2;
-          }
-          fixedRunLength += 1;
-
-          // if fixed run met the minimum condition and if variable
-          // run is non-zero then flush the variable run and shift the
-          // tail fixed runs to start of the buffer
-          if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
-            numLiterals -= MIN_REPEAT;
-            variableRunLength -= MIN_REPEAT - 1;
-            // copy the tail fixed runs
-            long[] tailVals = new long[MIN_REPEAT];
-            System.arraycopy(literals, numLiterals, tailVals, 0, MIN_REPEAT);
-
-            // determine variable encoding and flush values
-            determineEncoding();
-            writeValues();
-
-            // shift tail fixed runs to beginning of the buffer
-            for(long l : tailVals) {
-              literals[numLiterals++] = l;
-            }
-          }
-
-          // if fixed runs reached max repeat length then write values
-          if (fixedRunLength == MAX_SCOPE) {
-            determineEncoding();
-            writeValues();
-          }
-        } else {
-          // variable delta run
-
-          // if fixed run length is non-zero and if it satisfies the
-          // short repeat conditions then write the values as short repeats
-          // else use delta encoding
-          if (fixedRunLength >= MIN_REPEAT) {
-            if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
-              encoding = EncodingType.SHORT_REPEAT;
-              writeValues();
-            } else {
-              encoding = EncodingType.DELTA;
-              isFixedDelta = true;
-              writeValues();
-            }
-          }
-
-          // if fixed run length is <MIN_REPEAT and current value is
-          // different from previous then treat it as variable run
-          if (fixedRunLength > 0 && fixedRunLength < MIN_REPEAT) {
-            if (val != literals[numLiterals - 1]) {
-              variableRunLength = fixedRunLength;
-              fixedRunLength = 0;
-            }
-          }
-
-          // after writing values re-initialize the variables
-          if (numLiterals == 0) {
-            initializeLiterals(val);
-          } else {
-            // keep updating variable run lengths
-            prevDelta = val - literals[numLiterals - 1];
-            literals[numLiterals++] = val;
-            variableRunLength += 1;
-
-            // if variable run length reach the max scope, write it
-            if (variableRunLength == MAX_SCOPE) {
-              determineEncoding();
-              writeValues();
-            }
-          }
-        }
-      }
-    }
-  }
-
-  private void initializeLiterals(long val) {
-    literals[numLiterals++] = val;
-    fixedRunLength = 1;
-    variableRunLength = 1;
-  }
-
-  @Override
-  public void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-}

[7/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Posted by ji...@apache.org.

TAJO-2102: Migrate to Apache Orc from Presto's one.

Closes #985


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/68263585
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/68263585
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/68263585

Branch: refs/heads/master
Commit: 68263585296e30f93e541c36908a652df7398b9e
Parents: 9fcc9fd
Author: Jihoon Son <ji...@apache.org>
Authored: Wed Mar 23 10:39:31 2016 +0900
Committer: Jihoon Son <ji...@apache.org>
Committed: Wed Mar 23 10:39:59 2016 +0900

----------------------------------------------------------------------
 .../java/org/apache/tajo/catalog/TypeDesc.java  |    4 +
 .../tajo-catalog-drivers/tajo-hive/pom.xml      |  198 ++-
 .../tajo/catalog/store/HiveCatalogStore.java    |   15 +-
 .../tajo/catalog/store/HiveCatalogUtil.java     |    3 +
 .../catalog/store/TestHiveCatalogStore.java     |    1 +
 .../org/apache/tajo/cli/tools/TajoDump.java     |    2 +-
 .../org/apache/tajo/datum/TimestampDatum.java   |    2 +-
 .../apache/tajo/storage/StorageConstants.java   |    6 +-
 .../tajo/engine/query/TestSelectQuery.java      |   19 -
 .../apache/tajo/storage/TestQueryOnOrcFile.java |   79 +
 .../TestQueryOnOrcFile/timezoned/timezoned1.tbl |    3 +
 .../TestSelectQuery/timezoned/table1.tbl        |    3 -
 .../TestSelectQuery/timezoned/timezoned1.tbl    |    3 +
 .../datetime_table_timezoned_ddl.sql            |    5 +
 .../datetime_table_timezoned_orc_ddl.sql        |    4 +
 .../TestQueryOnOrcFile/testTimezone1.sql        |    1 +
 .../datetime_table_timezoned_orc_ddl.sql        |    4 -
 .../TestSelectQuery/testTimezonedORCTable.sql   |    2 -
 .../TestQueryOnOrcFile/testTimezone1.result     |    5 +
 .../TestQueryOnOrcFile/testTimezone2.result     |    5 +
 .../TestQueryOnOrcFile/testTimezone3.result     |    5 +
 .../TestQueryOnOrcFile/testTimezone4.result     |    5 +
 .../testTimezonedORCTable.result                |    5 -
 tajo-dist/pom.xml                               |   14 +-
 tajo-dist/src/main/bin/tajo                     |   10 +-
 tajo-project/pom.xml                            |    3 +-
 .../src/main/resources/storage-default.xml      |    2 +-
 .../src/test/resources/storage-default.xml      |    2 +-
 tajo-storage/tajo-storage-hdfs/pom.xml          |   34 +-
 .../apache/tajo/storage/orc/ORCAppender.java    |   93 +-
 .../org/apache/tajo/storage/orc/ORCScanner.java |  332 ----
 .../org/apache/tajo/storage/orc/OrcScanner.java |  460 ++++++
 .../objectinspector/ObjectInspectorFactory.java |   91 -
 .../TajoBlobObjectInspector.java                |   82 -
 .../TajoBooleanObjectInspector.java             |   76 -
 .../TajoDateObjectInspector.java                |   73 -
 .../TajoDoubleObjectInspector.java              |   76 -
 .../TajoFloatObjectInspector.java               |   76 -
 .../objectinspector/TajoIntObjectInspector.java |   76 -
 .../TajoLongObjectInspector.java                |   76 -
 .../TajoNullObjectInspector.java                |   69 -
 .../TajoPrimitiveObjectInspector.java           |   38 -
 .../TajoShortObjectInspector.java               |   76 -
 .../TajoStringObjectInspector.java              |   71 -
 .../TajoStructObjectInspector.java              |  122 --
 .../TajoTimestampObjectInspector.java           |   73 -
 .../thirdparty/orc/BinaryColumnStatistics.java  |   25 -
 .../storage/thirdparty/orc/BitFieldWriter.java  |   69 -
 .../storage/thirdparty/orc/BloomFilterIO.java   |   42 -
 .../thirdparty/orc/BooleanColumnStatistics.java |   27 -
 .../thirdparty/orc/ByteBufferAllocatorPool.java |  102 ++
 .../thirdparty/orc/ByteBufferPoolAdapter.java   |   41 +
 .../thirdparty/orc/ColumnStatistics.java        |   36 -
 .../thirdparty/orc/ColumnStatisticsImpl.java    | 1017 ------------
 .../thirdparty/orc/CompressionCodec.java        |   68 -
 .../storage/thirdparty/orc/CompressionKind.java |   27 -
 .../thirdparty/orc/DateColumnStatistics.java    |   37 -
 .../thirdparty/orc/DecimalColumnStatistics.java |   45 -
 .../orc/DirectDecompressionCodec.java           |   26 -
 .../thirdparty/orc/DoubleColumnStatistics.java  |   44 -
 .../thirdparty/orc/DynamicByteArray.java        |  303 ----
 .../storage/thirdparty/orc/DynamicIntArray.java |  142 --
 .../thirdparty/orc/HdfsOrcDataSource.java       |  133 --
 .../thirdparty/orc/IntegerColumnStatistics.java |   50 -
 .../storage/thirdparty/orc/IntegerWriter.java   |   47 -
 .../storage/thirdparty/orc/MemoryManager.java   |  212 ---
 .../tajo/storage/thirdparty/orc/Metadata.java   |   45 -
 .../tajo/storage/thirdparty/orc/OrcFile.java    |  389 +++--
 .../storage/thirdparty/orc/OrcRecordReader.java |  454 +++++
 .../tajo/storage/thirdparty/orc/OrcUtils.java   |  242 +--
 .../tajo/storage/thirdparty/orc/OutStream.java  |  286 ----
 .../thirdparty/orc/PositionRecorder.java        |   25 -
 .../thirdparty/orc/PositionedOutputStream.java  |   38 -
 .../thirdparty/orc/RecordReaderUtils.java       |  393 +++++
 .../storage/thirdparty/orc/RedBlackTree.java    |  309 ----
 .../thirdparty/orc/RunLengthByteWriter.java     |  106 --
 .../thirdparty/orc/RunLengthIntegerWriter.java  |  143 --
 .../orc/RunLengthIntegerWriterV2.java           |  832 ----------
 .../thirdparty/orc/SerializationUtils.java      |  844 ----------
 .../storage/thirdparty/orc/SnappyCodec.java     |  109 --
 .../tajo/storage/thirdparty/orc/StreamName.java |   95 --
 .../thirdparty/orc/StringColumnStatistics.java  |   41 -
 .../thirdparty/orc/StringRedBlackTree.java      |  202 ---
 .../thirdparty/orc/StripeInformation.java       |   59 -
 .../thirdparty/orc/StripeStatistics.java        |   42 -
 .../orc/TimestampColumnStatistics.java          |   38 -
 .../thirdparty/orc/TreeReaderFactory.java       | 1557 ++++++++++++++++++
 .../tajo/storage/thirdparty/orc/Writer.java     |    2 +
 .../tajo/storage/thirdparty/orc/WriterImpl.java |  813 +++++----
 .../storage/thirdparty/orc/ZeroCopyAdapter.java |   57 +
 .../tajo/storage/thirdparty/orc/ZlibCodec.java  |  169 --
 .../src/main/proto/orc_proto.proto              |  217 ---
 .../tajo/storage/TestCompressionStorages.java   |   13 +-
 .../org/apache/tajo/storage/TestStorages.java   |   69 +-
 .../resources/dataset/testVariousTypes.avsc     |    3 +-
 .../src/test/resources/storage-default.xml      |    2 +-
 96 files changed, 4214 insertions(+), 8277 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java
index 3bd0f00..3ca83f9 100644
--- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java
+++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TypeDesc.java
@@ -55,6 +55,10 @@ public class TypeDesc {
     }
   }
 
+  public Schema getNestedSchema() {
+    return nestedRecordSchema;
+  }
+
   public int hashCode() {
     return Objects.hashCode(dataType.hashCode(), nestedRecordSchema);
   }

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-catalog/tajo-catalog-drivers/tajo-hive/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hive/pom.xml b/tajo-catalog/tajo-catalog-drivers/tajo-hive/pom.xml
index 1a8a188..d848461 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hive/pom.xml
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hive/pom.xml
@@ -33,8 +33,6 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-    <parquet.version>1.5.0</parquet.version>
-    <parquet.format.version>2.1.0</parquet.format.version>
   </properties>
 
   <build>
@@ -136,19 +134,35 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-mapreduce-client-core</artifactId>
+      <artifactId>hadoop-common</artifactId>
       <version>${hadoop.version}</version>
       <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <artifactId>zookeeper</artifactId>
+          <groupId>org.apache.zookeeper</groupId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
+      <artifactId>hadoop-mapreduce-client-core</artifactId>
       <version>${hadoop.version}</version>
       <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <artifactId>hadoop-yarn-common</artifactId>
+          <groupId>org.apache.hadoop</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>netty</artifactId>
+          <groupId>io.netty</groupId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
-      <artifactId>hive-exec</artifactId>
+      <artifactId>hive-metastore</artifactId>
       <version>${hive.version}</version>
       <scope>provided</scope>
       <exclusions>
@@ -158,129 +172,201 @@
         </exclusion>
         <exclusion>
           <groupId>org.apache.hive</groupId>
-          <artifactId>hive-contrib</artifactId>
+          <artifactId>hive-serde</artifactId>
         </exclusion>
         <exclusion>
           <groupId>org.apache.hive</groupId>
-          <artifactId>hive-hbase-handler</artifactId>
+          <artifactId>hive-shimss</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-metastore</artifactId>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>libfb303</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-serde</artifactId>
+          <groupId>org.apache.thrift</groupId>
+          <artifactId>libthrift</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-shims</artifactId>
+          <groupId>com.jolbox</groupId>
+          <artifactId>bonecp</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-testutils</artifactId>
+          <artifactId>tephra-hbase-compat-1.0</artifactId>
+          <groupId>co.cask.tephra</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libfb303</artifactId>
+          <artifactId>tephra-core</artifactId>
+          <groupId>co.cask.tephra</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libthrift</artifactId>
+          <artifactId>tephra-api</artifactId>
+          <groupId>co.cask.tephra</groupId>
         </exclusion>
         <exclusion>
-          <groupId>com.jolbox</groupId>
-          <artifactId>bonecp</artifactId>
+          <artifactId>hbase-client</artifactId>
+          <groupId>org.apache.hbase</groupId>
         </exclusion>
         <exclusion>
-          <groupId>com.google.protobuf</groupId>
-          <artifactId>protobuf-java</artifactId>
+          <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+          <groupId>org.apache.hadoop</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.calcite</groupId>
-          <artifactId>calcite-core</artifactId>
+          <artifactId>antlr-runtime</artifactId>
+          <groupId>org.antlr</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.calcite</groupId>
-          <artifactId>calcite-avatica</artifactId>
+          <artifactId>log4j-slf4j-impl</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>zookeeper</artifactId>
+          <groupId>org.apache.zookeeper</groupId>
         </exclusion>
       </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
-      <artifactId>hive-metastore</artifactId>
+      <artifactId>hive-common</artifactId>
       <version>${hive.version}</version>
       <scope>provided</scope>
       <exclusions>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-common</artifactId>
+          <artifactId>jetty-all</artifactId>
+          <groupId>org.eclipse.jetty.aggregate</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-serde</artifactId>
+          <artifactId>javax.servlet</artifactId>
+          <groupId>org.eclipse.jetty.orbit</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-shimss</artifactId>
+          <artifactId>joda-time</artifactId>
+          <groupId>joda-time</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libfb303</artifactId>
+          <artifactId>jackson-databind</artifactId>
+          <groupId>com.fasterxml.jackson.core</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.thrift</groupId>
-          <artifactId>libthrift</artifactId>
+          <artifactId>metrics-json</artifactId>
+          <groupId>io.dropwizard.metrics</groupId>
         </exclusion>
         <exclusion>
-          <groupId>com.jolbox</groupId>
-          <artifactId>bonecp</artifactId>
+          <artifactId>metrics-jvm</artifactId>
+          <groupId>io.dropwizard.metrics</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>metrics-core</artifactId>
+          <groupId>io.dropwizard.metrics</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>ant</artifactId>
+          <groupId>org.apache.ant</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>json</artifactId>
+          <groupId>org.json</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>log4j-slf4j-impl</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>log4j-web</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>log4j-1.2-api</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
         </exclusion>
       </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
-      <artifactId>hive-cli</artifactId>
+      <artifactId>hive-exec</artifactId>
       <version>${hive.version}</version>
       <scope>provided</scope>
       <exclusions>
         <exclusion>
+          <artifactId>hive-ant</artifactId>
           <groupId>org.apache.hive</groupId>
-          <artifactId>hive-common</artifactId>
         </exclusion>
         <exclusion>
+          <artifactId>hive-llap-tez</artifactId>
           <groupId>org.apache.hive</groupId>
-          <artifactId>hive-exec</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-metastore</artifactId>
+          <artifactId>ST4</artifactId>
+          <groupId>org.antlr</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-serde</artifactId>
+          <artifactId>ivy</artifactId>
+          <groupId>org.apache.ivy</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-service</artifactId>
+          <artifactId>curator-framework</artifactId>
+          <groupId>org.apache.curator</groupId>
         </exclusion>
         <exclusion>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-shims</artifactId>
+          <artifactId>apache-curator</artifactId>
+          <groupId>org.apache.curator</groupId>
         </exclusion>
         <exclusion>
-          <groupId>com.jolbox</groupId>
-          <artifactId>bonecp</artifactId>
+          <artifactId>groovy-all</artifactId>
+          <groupId>org.codehaus.groovy</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>calcite-core</artifactId>
+          <groupId>org.apache.calcite</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>calcite-avatica</artifactId>
+          <groupId>org.apache.calcite</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>stax-api</artifactId>
+          <groupId>stax</groupId>
         </exclusion>
         <exclusion>
-          <groupId>jline</groupId>
           <artifactId>jline</artifactId>
+          <groupId>jline</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>log4j-1.2-api</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>log4j-slf4j-impl</artifactId>
+          <groupId>org.apache.logging.log4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>ant</artifactId>
+          <groupId>org.apache.ant</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>zookeeper</artifactId>
+          <groupId>org.apache.zookeeper</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>antlr-runtime</artifactId>
+          <groupId>org.antlr</groupId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-serde</artifactId>
+      <version>${hive.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <artifactId>opencsv</artifactId>
+          <groupId>net.sf.opencsv</groupId>
         </exclusion>
       </exclusions>
     </dependency>
     <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>parquet-hive-bundle</artifactId>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-hadoop-bundle</artifactId>
       <version>${parquet.version}</version>
     </dependency>
   </dependencies>

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
index 63f18b6..95cbf18 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
@@ -38,12 +38,15 @@ import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
 import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.orc.OrcConf;
+import org.apache.parquet.hadoop.ParquetOutputFormat;
 import org.apache.tajo.BuiltinStorages;
 import org.apache.tajo.TajoConstants;
 import org.apache.tajo.algebra.Expr;
 import org.apache.tajo.algebra.IsNullPredicate;
 import org.apache.tajo.algebra.JsonHelper;
 import org.apache.tajo.catalog.*;
+import org.apache.tajo.catalog.TableMeta;
 import org.apache.tajo.catalog.partition.PartitionMethodDesc;
 import org.apache.tajo.catalog.proto.CatalogProtos;
 import org.apache.tajo.catalog.proto.CatalogProtos.*;
@@ -56,10 +59,8 @@ import org.apache.tajo.plan.util.PartitionFilterAlgebraVisitor;
 import org.apache.tajo.storage.StorageConstants;
 import org.apache.tajo.util.KeyValueSet;
 import org.apache.thrift.TException;
-import parquet.hadoop.ParquetOutputFormat;
 
 import java.io.File;
-import java.io.IOException;
 import java.util.*;
 
 public class HiveCatalogStore extends CatalogConstants implements CatalogStore {
@@ -564,6 +565,16 @@ public class HiveCatalogStore extends CatalogConstants implements CatalogStore {
           table.putToParameters(ParquetOutputFormat.COMPRESSION,
               tableDesc.getMeta().getProperty(ParquetOutputFormat.COMPRESSION));
         }
+      } else if (tableDesc.getMeta().getDataFormat().equalsIgnoreCase(BuiltinStorages.ORC)) {
+        StorageFormatDescriptor descriptor = storageFormatFactory.get(IOConstants.ORC);
+        sd.setInputFormat(descriptor.getInputFormat());
+        sd.setOutputFormat(descriptor.getOutputFormat());
+        sd.getSerdeInfo().setSerializationLib(descriptor.getSerde());
+
+        if (tableDesc.getMeta().containsProperty(OrcConf.COMPRESS.getAttribute())) {
+          table.putToParameters(OrcConf.COMPRESS.getAttribute(),
+              tableDesc.getMeta().getProperty(OrcConf.COMPRESS.getAttribute()));
+        }
       } else {
         throw new UnsupportedException(tableDesc.getMeta().getDataFormat() + " in HivecatalogStore");
       }

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogUtil.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogUtil.java b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogUtil.java
index bbb7ade..87b391e 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogUtil.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogUtil.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.serde.serdeConstants;
@@ -137,6 +138,8 @@ public class HiveCatalogUtil {
       return BuiltinStorages.PARQUET;
     } else if (AvroSerDe.class.getName().equals(serde)) {
       return BuiltinStorages.AVRO;
+    } else if (OrcSerde.class.getName().equals(serde)) {
+      return BuiltinStorages.ORC;
     } else {
       throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
     }

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
index 7e1a3a4..46935fc 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
@@ -78,6 +78,7 @@ public class TestHiveCatalogStore {
     conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousePath.toUri().toString());
     conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, jdbcUri);
     conf.set(TajoConf.ConfVars.WAREHOUSE_DIR.varname, warehousePath.toUri().toString());
+    conf.setBoolean("datanucleus.schema.autoCreateAll", true);
 
     // create local HiveCatalogStore.
     TajoConf tajoConf = new TajoConf(conf);

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-cli/src/main/java/org/apache/tajo/cli/tools/TajoDump.java
----------------------------------------------------------------------
diff --git a/tajo-cli/src/main/java/org/apache/tajo/cli/tools/TajoDump.java b/tajo-cli/src/main/java/org/apache/tajo/cli/tools/TajoDump.java
index 4df418f..c9fa2b4 100644
--- a/tajo-cli/src/main/java/org/apache/tajo/cli/tools/TajoDump.java
+++ b/tajo-cli/src/main/java/org/apache/tajo/cli/tools/TajoDump.java
@@ -208,7 +208,7 @@ public class TajoDump {
           }
         }
         writer.write("\n\n");
-      } catch (Exception e) {
+      } catch (Throwable e) {
         // dump for each table can throw any exception. We need to skip the exception case.
         // here, the error message prints out via stderr.
         System.err.println("ERROR:" + tableName + "," + e.getMessage());

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
index 5b4c152..f69e7da 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
@@ -125,7 +125,7 @@ public class TimestampDatum extends Datum {
 
   /**
    *
-   * @param tm TimeMEta
+   * @param tm TimeMeta
    * @param timeZone Timezone
    * @param includeTimeZone Add timezone if it is true. It is usually used for TIMEZONEZ
    * @return A timestamp string

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
index 097963c..4612323 100644
--- a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
+++ b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
@@ -89,11 +89,7 @@ public class StorageConstants {
   public static final String DEFAULT_ORC_STRIPE_SIZE = "67108864"; // 64MB
 
   public static final String ORC_COMPRESSION = "orc.compress";
-  public static final String ORC_COMPRESSION_KIND_NONE = "none";
-  public static final String ORC_COMPRESSION_KIND_SNAPPY = "snappy";
-  public static final String ORC_COMPRESSION_KIND_LZO = "lzo";
-  public static final String ORC_COMPRESSION_KIND_ZIP = "zlip";
-  public static final String DEFAULT_ORC_COMPRESSION_KIND = ORC_COMPRESSION_KIND_NONE;
+  public static final String DEFAULT_ORC_COMPRESSION_KIND = "none";
 
   public static final String ORC_BUFFER_SIZE = "orc.buffer.size";
   public static final String DEFAULT_ORC_BUFFER_SIZE = "262144"; // 256KB

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java
index e55acf1..a2dec50 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java
@@ -682,25 +682,6 @@ public class TestSelectQuery extends QueryTestCaseBase {
       executeString("DROP TABLE IF EXISTS timezoned_load2 PURGE");
     }
   }
-
-  @Test
-  public void testTimezonedORCTable() throws Exception {
-    try {
-
-      executeDDL("datetime_table_timezoned_ddl.sql", "timezoned", "timezoned");
-      executeDDL("datetime_table_timezoned_orc_ddl.sql", null, "timezoned_orc");
-
-      executeString("INSERT OVERWRITE INTO timezoned_orc SELECT t_timestamp, t_date FROM timezoned");
-
-      ResultSet res = executeQuery();
-      assertResultSet(res, "testTimezonedORCTable.result");
-      executeString("SET TIME ZONE 'GMT'");
-      cleanupQuery(res);
-    } finally {
-      executeString("DROP TABLE IF EXISTS timezoned");
-      executeString("DROP TABLE IF EXISTS timezoned_orc PURGE");
-    }
-  }
   
   @Test
   public void testMultiBytesDelimiter1() throws Exception {

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestQueryOnOrcFile.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestQueryOnOrcFile.java b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestQueryOnOrcFile.java
new file mode 100644
index 0000000..29d132e
--- /dev/null
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestQueryOnOrcFile.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage;
+
+import org.apache.tajo.IntegrationTest;
+import org.apache.tajo.QueryTestCaseBase;
+import org.junit.*;
+import org.junit.experimental.categories.Category;
+
+import java.sql.ResultSet;
+
+@Category(IntegrationTest.class)
+public class TestQueryOnOrcFile extends QueryTestCaseBase {
+
+  @Before
+  public void setup() throws Exception {
+    executeDDL("datetime_table_timezoned_ddl.sql", "timezoned", "timezoned");
+    executeDDL("datetime_table_timezoned_orc_ddl.sql", null, "timezoned_orc");
+
+    executeString("INSERT OVERWRITE INTO timezoned_orc SELECT t_timestamp, t_date FROM timezoned");
+  }
+
+  @After
+  public void teardown() throws Exception {
+    executeString("DROP TABLE IF EXISTS timezoned");
+    executeString("DROP TABLE IF EXISTS timezoned_orc PURGE");
+  }
+
+  @Test
+  public void testTimezone1() throws Exception {
+    executeString("SET TIME ZONE 'GMT+9'");
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    executeString("SET TIME ZONE 'GMT'");
+    cleanupQuery(res);
+  }
+
+  @Test
+  public void testTimezone2() throws Exception {
+    executeString("SET TIME ZONE 'GMT+1'");
+    ResultSet res = executeString("select * from timezoned_orc");
+    assertResultSet(res);
+    executeString("SET TIME ZONE 'GMT'");
+    cleanupQuery(res);
+  }
+
+  @Test
+  public void testTimezone3() throws Exception {
+    executeString("SET TIME ZONE 'GMT'");
+    ResultSet res = executeString("select * from timezoned_orc");
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public void testTimezone4() throws Exception {
+    executeString("\\set TIMEZONE 'GMT-5'");
+    ResultSet res = executeString("select * from timezoned_orc");
+    assertResultSet(res);
+    executeString("SET TIME ZONE 'GMT'");
+    cleanupQuery(res);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/dataset/TestQueryOnOrcFile/timezoned/timezoned1.tbl
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestQueryOnOrcFile/timezoned/timezoned1.tbl b/tajo-core-tests/src/test/resources/dataset/TestQueryOnOrcFile/timezoned/timezoned1.tbl
new file mode 100644
index 0000000..74b2e1b
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/dataset/TestQueryOnOrcFile/timezoned/timezoned1.tbl
@@ -0,0 +1,3 @@
+1980-4-1 01:50:30.010|01:50:30.010|1980-04-01
+80/4/1 1:50:30 AM|1:50:30 AM|80/4/1
+1980 April 1 1:50:30|1:50:30|1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/table1.tbl
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/table1.tbl b/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/table1.tbl
deleted file mode 100644
index 74b2e1b..0000000
--- a/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/table1.tbl
+++ /dev/null
@@ -1,3 +0,0 @@
-1980-4-1 01:50:30.010|01:50:30.010|1980-04-01
-80/4/1 1:50:30 AM|1:50:30 AM|80/4/1
-1980 April 1 1:50:30|1:50:30|1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/timezoned1.tbl
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/timezoned1.tbl b/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/timezoned1.tbl
new file mode 100644
index 0000000..74b2e1b
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/dataset/TestSelectQuery/timezoned/timezoned1.tbl
@@ -0,0 +1,3 @@
+1980-4-1 01:50:30.010|01:50:30.010|1980-04-01
+80/4/1 1:50:30 AM|1:50:30 AM|80/4/1
+1980 April 1 1:50:30|1:50:30|1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_ddl.sql
new file mode 100644
index 0000000..9c5d30d
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_ddl.sql
@@ -0,0 +1,5 @@
+CREATE EXTERNAL TABLE ${0} (
+  t_timestamp  TIMESTAMP,
+  t_time       TIME,
+  t_date       DATE
+) USING TEXT WITH ('timezone' = 'GMT+9') LOCATION ${table.path}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_orc_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_orc_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_orc_ddl.sql
new file mode 100644
index 0000000..49e1f7e
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/datetime_table_timezoned_orc_ddl.sql
@@ -0,0 +1,4 @@
+CREATE TABLE ${0} (
+  t_timestamp  TIMESTAMP,
+  t_date    DATE
+) USING ORC WITH ('timezone' = 'GMT+9')

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/testTimezone1.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/testTimezone1.sql b/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/testTimezone1.sql
new file mode 100644
index 0000000..2464c97
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestQueryOnOrcFile/testTimezone1.sql
@@ -0,0 +1 @@
+SELECT * FROM timezoned_orc;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql
deleted file mode 100644
index 49e1f7e..0000000
--- a/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql
+++ /dev/null
@@ -1,4 +0,0 @@
-CREATE TABLE ${0} (
-  t_timestamp  TIMESTAMP,
-  t_date    DATE
-) USING ORC WITH ('timezone' = 'GMT+9')

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql
deleted file mode 100644
index 1d898bd..0000000
--- a/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql
+++ /dev/null
@@ -1,2 +0,0 @@
-SET SESSION TIMEZONE = 'GMT+9';
-SELECT * FROM timezoned_orc;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone1.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone1.result b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone1.result
new file mode 100644
index 0000000..39f593b
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone1.result
@@ -0,0 +1,5 @@
+t_timestamp,t_date
+-------------------------------
+1980-04-01 01:50:30.01,1980-04-01
+1980-04-01 01:50:30,1980-04-01
+1980-04-01 01:50:30,1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone2.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone2.result b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone2.result
new file mode 100644
index 0000000..c0e5cef
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone2.result
@@ -0,0 +1,5 @@
+t_timestamp,t_date
+-------------------------------
+1980-03-31 17:50:30.01,1980-04-01
+1980-03-31 17:50:30,1980-04-01
+1980-03-31 17:50:30,1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone3.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone3.result b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone3.result
new file mode 100644
index 0000000..916f4be
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone3.result
@@ -0,0 +1,5 @@
+t_timestamp,t_date
+-------------------------------
+1980-03-31 16:50:30.01,1980-04-01
+1980-03-31 16:50:30,1980-04-01
+1980-03-31 16:50:30,1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone4.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone4.result b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone4.result
new file mode 100644
index 0000000..98e0918
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestQueryOnOrcFile/testTimezone4.result
@@ -0,0 +1,5 @@
+t_timestamp,t_date
+-------------------------------
+1980-03-31 11:50:30.01,1980-04-01
+1980-03-31 11:50:30,1980-04-01
+1980-03-31 11:50:30,1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result b/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result
deleted file mode 100644
index 39f593b..0000000
--- a/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result
+++ /dev/null
@@ -1,5 +0,0 @@
-t_timestamp,t_date
--------------------------------
-1980-04-01 01:50:30.01,1980-04-01
-1980-04-01 01:50:30,1980-04-01
-1980-04-01 01:50:30,1980-04-01
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-dist/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-dist/pom.xml b/tajo-dist/pom.xml
index 095f128..652ab84 100644
--- a/tajo-dist/pom.xml
+++ b/tajo-dist/pom.xml
@@ -154,22 +154,14 @@
                       run cp -r ${project.basedir}/src/main/conf .
                       run rm -rf lib/tajo-*-${project.version}.jar
 
-                      run mkdir hive
-                      run mv lib/hive-*.jar hive/
-
+                      run mkdir -p lib
+                      run cp -r $ROOT/tajo-storage/tajo-storage-hdfs/target/lib/hive-*.jar lib/
+                      
                       run mkdir -p share/jdbc-dist
                       run cp -r $ROOT/tajo-jdbc/target/tajo-jdbc-${project.version}-jar-with-dependencies.jar ./share/jdbc-dist/tajo-jdbc-${project.version}.jar
 
                       run mkdir -p extlib
 
-                      if [ -f $ROOT/tajo-catalog/tajo-catalog-drivers/tajo-hive/target/lib/parquet-hive-bundle-*.jar ]
-                      then
-                      run cp -r $ROOT/tajo-catalog/tajo-catalog-drivers/tajo-hive/target/lib/parquet-hive-bundle-*.jar lib/
-                      echo
-                      echo "Tajo installed parquet-hive-bundle library at: ${project.build.directory}/tajo-${project.version}"
-                      echo
-                      fi
-
                       echo
                       echo "Tajo dist layout available at: ${project.build.directory}/tajo-${project.version}"
                       echo

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-dist/src/main/bin/tajo
----------------------------------------------------------------------
diff --git a/tajo-dist/src/main/bin/tajo b/tajo-dist/src/main/bin/tajo
index c08c538..007e960 100755
--- a/tajo-dist/src/main/bin/tajo
+++ b/tajo-dist/src/main/bin/tajo
@@ -300,11 +300,15 @@ if [ ! -z ${HIVE_HOME} ] && [ -d ${HIVE_HOME} ] && [ -d ${HIVE_LIB} ]; then
     CLASSPATH=${CLASSPATH}:$f;
   done
 
-  for f in ${HIVE_LIB}/datanucleus-*.jar; do
+  for f in ${HIVE_LIB}/javax.jdo-*.jar; do
     CLASSPATH=${CLASSPATH}:$f;
   done
-else
-  for f in $TAJO_HOME/hive/*.jar; do
+
+  for f in ${HIVE_LIB}/log4j-core-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+
+  for f in ${HIVE_LIB}/datanucleus-*.jar; do
     CLASSPATH=${CLASSPATH}:$f;
   done
 fi

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-project/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml
index cd86d3b..27fa66b 100644
--- a/tajo-project/pom.xml
+++ b/tajo-project/pom.xml
@@ -36,10 +36,11 @@
     <hadoop.version>2.7.2</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>
     <hbase.version>1.1.1</hbase.version>
-    <hive.version>1.1.0</hive.version>
+    <hive.version>2.0.0</hive.version>
     <netty.version>4.0.34.Final</netty.version>
     <jersey.version>2.6</jersey.version>
     <jetty.version>6.1.26</jetty.version>
+    <parquet.version>1.8.1</parquet.version>
     <tajo.root>${project.parent.relativePath}/..</tajo.root>
     <extra.source.path>src/main/hadoop-${hadoop.version}</extra.source.path>
   </properties>

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml
index 7f4661b..2454714 100644
--- a/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml
+++ b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml
@@ -130,7 +130,7 @@
 
   <property>
     <name>tajo.storage.scanner-handler.orc.class</name>
-    <value>org.apache.tajo.storage.orc.ORCScanner</value>
+    <value>org.apache.tajo.storage.orc.OrcScanner</value>
   </property>
 
   <property>

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml
index 934dd01..1c4530a 100644
--- a/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml
+++ b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml
@@ -132,7 +132,7 @@
 
   <property>
     <name>tajo.storage.scanner-handler.orc.class</name>
-    <value>org.apache.tajo.storage.orc.ORCScanner</value>
+    <value>org.apache.tajo.storage.orc.OrcScanner</value>
   </property>
 
   <property>

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/pom.xml b/tajo-storage/tajo-storage-hdfs/pom.xml
index 5f66395..aa6e6a6 100644
--- a/tajo-storage/tajo-storage-hdfs/pom.xml
+++ b/tajo-storage/tajo-storage-hdfs/pom.xml
@@ -34,7 +34,6 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-    <parquet.version>1.8.1</parquet.version>
   </properties>
 
   <repositories>
@@ -129,7 +128,6 @@
                 <argument>--proto_path=../../tajo-catalog/tajo-catalog-common/src/main/proto</argument>
                 <argument>--java_out=target/generated-sources/proto</argument>
                 <argument>src/main/proto/StorageFragmentProtos.proto</argument>
-                <argument>src/main/proto/orc_proto.proto</argument>
               </arguments>
             </configuration>
             <goals>
@@ -161,6 +159,26 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-report-plugin</artifactId>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <includeScope>runtime</includeScope>
+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
+              <overWriteReleases>false</overWriteReleases>
+              <overWriteSnapshots>false</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
@@ -345,10 +363,16 @@
       <artifactId>netty-buffer</artifactId>
     </dependency>
     <dependency>
-      <groupId>com.facebook.presto</groupId>
-      <artifactId>presto-orc</artifactId>
-      <version>0.141</version>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-orc</artifactId>
+      <version>${hive.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-storage-api</artifactId>
+      <version>${hive.version}</version>
+    </dependency>
+
   </dependencies>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java
index 7999d02..b27c640 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java
@@ -20,6 +20,9 @@ package org.apache.tajo.storage.orc;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcConf;
+import org.apache.orc.TypeDescription;
 import org.apache.tajo.TajoConstants;
 import org.apache.tajo.TaskAttemptId;
 import org.apache.tajo.catalog.Schema;
@@ -29,12 +32,13 @@ import org.apache.tajo.storage.FileAppender;
 import org.apache.tajo.storage.StorageConstants;
 import org.apache.tajo.storage.TableStatistics;
 import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.storage.orc.objectinspector.ObjectInspectorFactory;
-import org.apache.tajo.storage.thirdparty.orc.CompressionKind;
 import org.apache.tajo.storage.thirdparty.orc.OrcFile;
+import org.apache.tajo.storage.thirdparty.orc.OrcFile.EncodingStrategy;
+import org.apache.tajo.storage.thirdparty.orc.OrcUtils;
 import org.apache.tajo.storage.thirdparty.orc.Writer;
 
 import java.io.IOException;
+import java.util.Properties;
 import java.util.TimeZone;
 
 public class ORCAppender extends FileAppender {
@@ -46,21 +50,14 @@ public class ORCAppender extends FileAppender {
                      TableMeta meta, Path workDir) {
     super(conf, taskAttemptId, schema, meta, workDir);
 
-    timezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE,
-        TajoConstants.DEFAULT_SYSTEM_TIMEZONE));
+    timezone = meta.containsProperty(StorageConstants.TIMEZONE) ?
+        TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE)) :
+        TimeZone.getDefault();
   }
 
   @Override
   public void init() throws IOException {
-    writer = OrcFile.createWriter(workDir.getFileSystem(conf), path, conf,
-      ObjectInspectorFactory.buildStructObjectInspector(schema),
-      Long.parseLong(meta.getProperty(StorageConstants.ORC_STRIPE_SIZE,
-        StorageConstants.DEFAULT_ORC_STRIPE_SIZE)), getCompressionKind(),
-      Integer.parseInt(meta.getProperty(StorageConstants.ORC_BUFFER_SIZE,
-        StorageConstants.DEFAULT_ORC_BUFFER_SIZE)),
-      Integer.parseInt(meta.getProperty(StorageConstants.ORC_ROW_INDEX_STRIDE,
-        StorageConstants.DEFAULT_ORC_ROW_INDEX_STRIDE)),
-      timezone);
+    writer = OrcFile.createWriter(path, buildWriterOptions(conf, meta, schema), timezone);
 
     if (tableStatsEnabled) {
       this.stats = new TableStatistics(schema, columnStatsEnabled);
@@ -90,7 +87,6 @@ public class ORCAppender extends FileAppender {
   public void close() throws IOException {
     writer.close();
 
-    // TODO: getOffset is not implemented yet
 //    if (tableStatsEnabled) {
 //      stats.setNumBytes(getOffset());
 //    }
@@ -107,24 +103,81 @@ public class ORCAppender extends FileAppender {
 
   @Override
   public long getEstimatedOutputSize() throws IOException {
-    return writer.getRawDataSize() * writer.getNumberOfRows();
+    return writer.getRawDataSize();
   }
 
-  private CompressionKind getCompressionKind() {
-    String kindstr = meta.getProperty(StorageConstants.ORC_COMPRESSION, StorageConstants.DEFAULT_ORC_COMPRESSION_KIND);
+  private static OrcFile.WriterOptions buildWriterOptions(Configuration conf, TableMeta meta, Schema schema) {
+    return OrcFile.writerOptions(conf)
+        .setSchema(OrcUtils.convertSchema(schema))
+        .compress(getCompressionKind(meta))
+        .stripeSize(Long.parseLong(meta.getProperty(OrcConf.STRIPE_SIZE.getAttribute(),
+            String.valueOf(OrcConf.STRIPE_SIZE.getDefaultValue()))))
+        .blockSize(Long.parseLong(meta.getProperty(OrcConf.BLOCK_SIZE.getAttribute(),
+            String.valueOf(OrcConf.BLOCK_SIZE.getDefaultValue()))))
+        .rowIndexStride(Integer.parseInt(meta.getProperty(OrcConf.ROW_INDEX_STRIDE.getAttribute(),
+            String.valueOf(OrcConf.ROW_INDEX_STRIDE.getDefaultValue()))))
+        .bufferSize(Integer.parseInt(meta.getProperty(OrcConf.BUFFER_SIZE.getAttribute(),
+            String.valueOf(OrcConf.BUFFER_SIZE.getDefaultValue()))))
+        .blockPadding(Boolean.parseBoolean(meta.getProperty(OrcConf.BLOCK_PADDING.getAttribute(),
+            String.valueOf(OrcConf.BLOCK_PADDING.getDefaultValue()))))
+        .encodingStrategy(EncodingStrategy.valueOf(meta.getProperty(OrcConf.ENCODING_STRATEGY.getAttribute(),
+            String.valueOf(OrcConf.ENCODING_STRATEGY.getDefaultValue()))))
+        .bloomFilterFpp(Double.parseDouble(meta.getProperty(OrcConf.BLOOM_FILTER_FPP.getAttribute(),
+            String.valueOf(OrcConf.BLOOM_FILTER_FPP.getDefaultValue()))))
+        .bloomFilterColumns(meta.getProperty(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(),
+            String.valueOf(OrcConf.BLOOM_FILTER_COLUMNS.getDefaultValue())));
+  }
+
+  private static CompressionKind getCompressionKind(TableMeta meta) {
+    String kindstr = meta.getProperty(OrcConf.COMPRESS.getAttribute(),
+        String.valueOf(OrcConf.COMPRESS.getDefaultValue()));
 
-    if (kindstr.equalsIgnoreCase(StorageConstants.ORC_COMPRESSION_KIND_ZIP)) {
+    if (kindstr.equalsIgnoreCase(CompressionKind.ZLIB.name())) {
       return CompressionKind.ZLIB;
     }
 
-    if (kindstr.equalsIgnoreCase(StorageConstants.ORC_COMPRESSION_KIND_SNAPPY)) {
+    if (kindstr.equalsIgnoreCase(CompressionKind.SNAPPY.name())) {
       return CompressionKind.SNAPPY;
     }
 
-    if (kindstr.equalsIgnoreCase(StorageConstants.ORC_COMPRESSION_KIND_LZO)) {
+    if (kindstr.equalsIgnoreCase(CompressionKind.LZO.name())) {
       return CompressionKind.LZO;
     }
 
     return CompressionKind.NONE;
   }
+
+  /**
+   * Options for creating ORC file writers.
+   */
+  public static class WriterOptions extends OrcFile.WriterOptions {
+    // Setting the default batch size to 1000 makes the memory check at 5000
+    // rows work the same as the row by row writer. (If it was the default 1024,
+    // the smallest stripe size would be 5120 rows, which changes the output
+    // of some of the tests.)
+    private int batchSize = 1000;
+
+    public WriterOptions(Properties tableProperties, Configuration conf) {
+      super(tableProperties, conf);
+    }
+
+    /**
+     * Set the schema for the file. This is a required parameter.
+     * @param schema the schema for the file.
+     * @return this
+     */
+    public WriterOptions setSchema(TypeDescription schema) {
+      super.setSchema(schema);
+      return this;
+    }
+
+    protected WriterOptions batchSize(int maxSize) {
+      batchSize = maxSize;
+      return this;
+    }
+
+    int getBatchSize() {
+      return batchSize;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java
deleted file mode 100644
index 0a4ebc6..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java
+++ /dev/null
@@ -1,332 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc;
-
-import com.facebook.presto.orc.OrcDataSource;
-import com.facebook.presto.orc.OrcPredicate;
-import com.facebook.presto.orc.OrcReader;
-import com.facebook.presto.orc.OrcRecordReader;
-import com.facebook.presto.orc.memory.AggregatedMemoryContext;
-import com.facebook.presto.orc.metadata.OrcMetadataReader;
-import com.facebook.presto.spi.block.Block;
-import com.facebook.presto.spi.type.*;
-import com.google.protobuf.InvalidProtocolBufferException;
-import io.airlift.units.DataSize;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.tajo.TajoConstants;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.TableMeta;
-import org.apache.tajo.common.TajoDataTypes;
-import org.apache.tajo.conf.TajoConf;
-import org.apache.tajo.datum.*;
-import org.apache.tajo.exception.NotImplementedException;
-import org.apache.tajo.exception.TajoRuntimeException;
-import org.apache.tajo.plan.expr.EvalNode;
-import org.apache.tajo.storage.FileScanner;
-import org.apache.tajo.storage.StorageConstants;
-import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.storage.VTuple;
-import org.apache.tajo.storage.fragment.Fragment;
-import org.apache.tajo.storage.thirdparty.orc.HdfsOrcDataSource;
-import org.apache.tajo.util.datetime.DateTimeUtil;
-import org.joda.time.DateTimeZone;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-
-/**
- * OrcScanner for reading ORC files
- */
-public class ORCScanner extends FileScanner {
-  private static final Log LOG = LogFactory.getLog(ORCScanner.class);
-  private OrcRecordReader recordReader;
-  private Block[] blocks;
-  private int currentPosInBatch = 0;
-  private int batchSize = 0;
-  private Tuple outTuple;
-  private AggregatedMemoryContext aggrMemoryContext = new AggregatedMemoryContext();
-
-  public ORCScanner(Configuration conf, final Schema schema, final TableMeta meta, final Fragment fragment) {
-    super(conf, schema, meta, fragment);
-  }
-
-  private FileSystem fs;
-  private FSDataInputStream fis;
-
-  private static class ColumnInfo {
-    TajoDataTypes.DataType type;
-    int id;
-  }
-
-  /**
-   * Temporary array for caching column info
-   */
-  private ColumnInfo [] targetColInfo;
-
-  @Override
-  public void init() throws IOException {
-    OrcReader orcReader;
-    DataSize maxMergeDistance = new DataSize(Double.parseDouble(meta.getProperty(StorageConstants.ORC_MAX_MERGE_DISTANCE,
-            StorageConstants.DEFAULT_ORC_MAX_MERGE_DISTANCE)), DataSize.Unit.BYTE);
-    DataSize maxReadSize = new DataSize(Double.parseDouble(meta.getProperty(StorageConstants.ORC_MAX_READ_BUFFER_SIZE,
-        StorageConstants.DEFAULT_ORC_MAX_READ_BUFFER_SIZE)), DataSize.Unit.BYTE);
-
-    if (targets == null) {
-      targets = schema.toArray();
-    }
-
-    outTuple = new VTuple(targets.length);
-
-    Path path = fragment.getPath();
-
-    if(fs == null) {
-      fs = FileScanner.getFileSystem((TajoConf)conf, path);
-    }
-
-    if(fis == null) {
-      fis = fs.open(path);
-    }
-
-    OrcDataSource orcDataSource = new HdfsOrcDataSource(
-        this.fragment.getPath().toString(),
-        fis,
-        fs.getFileStatus(path).getLen(),
-        maxMergeDistance,
-        maxReadSize);
-
-    targetColInfo = new ColumnInfo[targets.length];
-    for (int i=0; i<targets.length; i++) {
-      ColumnInfo cinfo = new ColumnInfo();
-      cinfo.type = targets[i].getDataType();
-      cinfo.id = schema.getColumnId(targets[i].getQualifiedName());
-      targetColInfo[i] = cinfo;
-    }
-
-    // creating blocks for buffering
-    blocks = new Block[targetColInfo.length];
-
-    Map<Integer, Type> columnMap = new HashMap<>();
-    for (ColumnInfo colInfo: targetColInfo) {
-      columnMap.put(colInfo.id, createFBtypeByTajoType(colInfo.type));
-    }
-
-    orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize);
-
-    TimeZone timezone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE,
-      TajoConstants.DEFAULT_SYSTEM_TIMEZONE));
-
-    // TODO: make OrcPredicate useful
-    // presto-orc uses joda timezone, so it needs to be converted.
-    recordReader = orcReader.createRecordReader(columnMap, OrcPredicate.TRUE,
-        fragment.getStartKey(), fragment.getLength(), DateTimeZone.forTimeZone(timezone), aggrMemoryContext);
-
-    super.init();
-    LOG.debug("file fragment { path: " + fragment.getPath() +
-      ", start offset: " + fragment.getStartKey() +
-      ", length: " + fragment.getLength() + "}");
-  }
-
-  @Override
-  public Tuple next() throws IOException {
-    if (currentPosInBatch == batchSize) {
-      getNextBatch();
-
-      // EOF
-      if (batchSize == -1) {
-        return null;
-      }
-    }
-
-    for (int i=0; i<targetColInfo.length; i++) {
-      outTuple.put(i, createValueDatum(blocks[i], targetColInfo[i].type));
-    }
-
-    currentPosInBatch++;
-
-    return outTuple;
-  }
-
-  private Type createFBtypeByTajoType(TajoDataTypes.DataType type) {
-    switch(type.getType()) {
-      case BOOLEAN:
-        return BooleanType.BOOLEAN;
-
-      case INT1:
-      case INT2:
-      case INT4:
-      case INT8:
-      case INET4:
-      case NULL_TYPE: // meaningless
-        return BigintType.BIGINT;
-
-      case TIMESTAMP:
-        return TimestampType.TIMESTAMP;
-
-      case DATE:
-        return DateType.DATE;
-
-      case FLOAT4:
-      case FLOAT8:
-        return DoubleType.DOUBLE;
-
-      case CHAR:
-      case TEXT:
-        return VarcharType.VARCHAR;
-
-      case BLOB:
-      case PROTOBUF:
-        return VarbinaryType.VARBINARY;
-
-      default:
-        throw new TajoRuntimeException(new NotImplementedException(type.getType().name() + " for orc"));
-    }
-  }
-
-  // TODO: support more types
-  private Datum createValueDatum(Block block, TajoDataTypes.DataType type) {
-    if (block.isNull(currentPosInBatch))
-      return NullDatum.get();
-
-    // NOTE: block.get***() methods are determined by the type size wich is in createFBtypeByTajoType()
-    switch (type.getType()) {
-      case INT1:
-        return DatumFactory.createInt2((short)block.getLong(currentPosInBatch, 0));
-
-      case INT2:
-        return DatumFactory.createInt2((short)block.getLong(currentPosInBatch, 0));
-
-      case INT4:
-        return DatumFactory.createInt4((int)block.getLong(currentPosInBatch, 0));
-
-      case INT8:
-        return DatumFactory.createInt8(block.getLong(currentPosInBatch, 0));
-
-      case FLOAT4:
-        return DatumFactory.createFloat4((float)block.getDouble(currentPosInBatch, 0));
-
-      case FLOAT8:
-        return DatumFactory.createFloat8(block.getDouble(currentPosInBatch, 0));
-
-      case BOOLEAN:
-        return DatumFactory.createBool(block.getByte(currentPosInBatch, 0) != 0);
-
-      case CHAR:
-        return DatumFactory.createChar(block.getSlice(currentPosInBatch, 0,
-            block.getLength(currentPosInBatch)).getBytes());
-
-      case TEXT:
-        return DatumFactory.createText(block.getSlice(currentPosInBatch, 0,
-            block.getLength(currentPosInBatch)).getBytes());
-
-      case BLOB:
-        return DatumFactory.createBlob(block.getSlice(currentPosInBatch, 0,
-            block.getLength(currentPosInBatch)).getBytes());
-
-      case PROTOBUF:
-        try {
-          return ProtobufDatumFactory.createDatum(type, block.getSlice(currentPosInBatch, 0,
-              block.getLength(currentPosInBatch)).getBytes());
-        } catch (InvalidProtocolBufferException e) {
-          LOG.error("ERROR", e);
-          return NullDatum.get();
-        }
-
-      case TIMESTAMP:
-        return DatumFactory.createTimestamp(
-            DateTimeUtil.javaTimeToJulianTime(block.getLong(currentPosInBatch, 0)));
-
-      case DATE:
-        return DatumFactory.createDate(
-            block.getInt(currentPosInBatch, 0) + DateTimeUtil.DAYS_FROM_JULIAN_TO_EPOCH);
-
-      case INET4:
-        return DatumFactory.createInet4((int)block.getLong(currentPosInBatch, 0));
-
-      case NULL_TYPE:
-        return NullDatum.get();
-
-      default:
-        throw new TajoRuntimeException(new NotImplementedException(type.getType().name() + " for orc"));
-    }
-  }
-
-  /**
-   * Fetch next batch from ORC file and write to block data structure as many as batch size
-   *
-   * @throws IOException
-   */
-  private void getNextBatch() throws IOException {
-    batchSize = recordReader.nextBatch();
-
-    // end of file
-    if (batchSize == -1)
-      return;
-
-    for (int i=0; i<targetColInfo.length; i++) {
-      blocks[i] = recordReader.readBlock(createFBtypeByTajoType(targetColInfo[i].type), targetColInfo[i].id);
-    }
-
-    currentPosInBatch = 0;
-  }
-
-  @Override
-  public float getProgress() {
-    if(!inited) return super.getProgress();
-
-    return recordReader.getProgress();
-  }
-
-  @Override
-  public void reset() throws IOException {
-  }
-
-  @Override
-  public void close() throws IOException {
-    if (recordReader != null) {
-      recordReader.close();
-    }
-  }
-
-  @Override
-  public boolean isProjectable() {
-    return true;
-  }
-
-  @Override
-  public boolean isSelectable() {
-    return false;
-  }
-
-  @Override
-  public void setFilter(EvalNode filter) {
-    // TODO: implement it
-  }
-
-  @Override
-  public boolean isSplittable() {
-    return true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java
new file mode 100644
index 0000000..c8aa67b
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java
@@ -0,0 +1,460 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.orc;
+
+import com.google.common.collect.Lists;
+import com.google.protobuf.CodedInputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.orc.*;
+import org.apache.orc.Reader.Options;
+import org.apache.orc.impl.BufferChunk;
+import org.apache.orc.impl.InStream;
+import org.apache.tajo.TajoConstants;
+import org.apache.tajo.catalog.Schema;
+import org.apache.tajo.catalog.TableMeta;
+import org.apache.tajo.plan.expr.EvalNode;
+import org.apache.tajo.storage.FileScanner;
+import org.apache.tajo.storage.StorageConstants;
+import org.apache.tajo.storage.Tuple;
+import org.apache.tajo.storage.fragment.Fragment;
+import org.apache.tajo.storage.thirdparty.orc.OrcRecordReader;
+import org.apache.tajo.storage.thirdparty.orc.OrcUtils;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TimeZone;
+
+public class OrcScanner extends FileScanner {
+  private static final Log LOG = LogFactory.getLog(OrcScanner.class);
+
+  private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
+
+  protected final FileSystem fileSystem;
+  private final long maxLength = Long.MAX_VALUE;
+  protected final Path path;
+  protected org.apache.orc.CompressionKind compressionKind;
+  protected CompressionCodec codec;
+  protected int bufferSize;
+  private List<OrcProto.StripeStatistics> stripeStats;
+  private int metadataSize;
+  protected List<OrcProto.Type> types;
+  private List<OrcProto.UserMetadataItem> userMetadata;
+  private List<OrcProto.ColumnStatistics> fileStats;
+  private List<StripeInformation> stripes;
+  protected int rowIndexStride;
+  private long contentLength, numberOfRows;
+
+  private List<Integer> versionList;
+
+  //serialized footer - Keeping this around for use by getFileMetaInfo()
+  // will help avoid cpu cycles spend in deserializing at cost of increased
+  // memory footprint.
+  private ByteBuffer footerByteBuffer;
+  // Same for metastore cache - maintains the same background buffer, but includes postscript.
+  // This will only be set if the file footer/metadata was read from disk.
+  private ByteBuffer footerMetaAndPsBuffer;
+
+  private OrcRecordReader recordReader;
+
+  private long recordCount = 0;
+
+  /**
+   * Ensure this is an ORC file to prevent users from trying to read text
+   * files or RC files as ORC files.
+   * @param in the file being read
+   * @param path the filename for error messages
+   * @param psLen the postscript length
+   * @param buffer the tail of the file
+   * @throws IOException
+   */
+  static void ensureOrcFooter(FSDataInputStream in,
+                              Path path,
+                              int psLen,
+                              ByteBuffer buffer) throws IOException {
+    int len = OrcFile.MAGIC.length();
+    if (psLen < len + 1) {
+      throw new IOException("Malformed ORC file " + path +
+          ". Invalid postscript length " + psLen);
+    }
+    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len;
+    byte[] array = buffer.array();
+    // now look for the magic string at the end of the postscript.
+    if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
+      // If it isn't there, this may be the 0.11.0 version of ORC.
+      // Read the first 3 bytes of the file to check for the header
+      byte[] header = new byte[len];
+      in.readFully(0, header, 0, len);
+      // if it isn't there, this isn't an ORC file
+      if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
+        throw new IOException("Malformed ORC file " + path +
+            ". Invalid postscript.");
+      }
+    }
+  }
+
+  /**
+   * Build a version string out of an array.
+   * @param version the version number as a list
+   * @return the human readable form of the version string
+   */
+  private static String versionString(List<Integer> version) {
+    StringBuilder buffer = new StringBuilder();
+    for(int i=0; i < version.size(); ++i) {
+      if (i != 0) {
+        buffer.append('.');
+      }
+      buffer.append(version.get(i));
+    }
+    return buffer.toString();
+  }
+
+  /**
+   * Check to see if this ORC file is from a future version and if so,
+   * warn the user that we may not be able to read all of the column encodings.
+   * @param log the logger to write any error message to
+   * @param path the data source path for error messages
+   * @param version the version of hive that wrote the file.
+   */
+  static void checkOrcVersion(Log log, Path path, List<Integer> version) {
+    if (version.size() >= 1) {
+      int major = version.get(0);
+      int minor = 0;
+      if (version.size() >= 2) {
+        minor = version.get(1);
+      }
+      if (major > OrcFile.Version.CURRENT.getMajor() ||
+          (major == OrcFile.Version.CURRENT.getMajor() &&
+              minor > OrcFile.Version.CURRENT.getMinor())) {
+        log.warn(path + " was written by a future Hive version " +
+            versionString(version) +
+            ". This file may not be readable by this version of Hive.");
+      }
+    }
+  }
+
+  public OrcScanner(Configuration conf, Schema schema, TableMeta meta, Fragment fragment) throws IOException {
+    super(conf, schema, meta, fragment);
+
+    this.path = this.fragment.getPath();
+    this.fileSystem = this.path.getFileSystem(conf);
+  }
+
+  private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
+                                                        Path path,
+                                                        long maxFileLength
+  ) throws IOException {
+    FSDataInputStream file = fs.open(path);
+
+    // figure out the size of the file using the option or filesystem
+    long size;
+    if (maxFileLength == Long.MAX_VALUE) {
+      size = fs.getFileStatus(path).getLen();
+    } else {
+      size = maxFileLength;
+    }
+
+    //read last bytes into buffer to get PostScript
+    int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
+    ByteBuffer buffer = ByteBuffer.allocate(readSize);
+    assert buffer.position() == 0;
+    file.readFully((size - readSize),
+        buffer.array(), buffer.arrayOffset(), readSize);
+    buffer.position(0);
+
+    //read the PostScript
+    //get length of PostScript
+    int psLen = buffer.get(readSize - 1) & 0xff;
+    ensureOrcFooter(file, path, psLen, buffer);
+    int psOffset = readSize - 1 - psLen;
+    OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset);
+
+    int footerSize = (int) ps.getFooterLength();
+    int metadataSize = (int) ps.getMetadataLength();
+
+    //check if extra bytes need to be read
+    ByteBuffer fullFooterBuffer = null;
+    int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
+    if (extra > 0) {
+      //more bytes need to be read, seek back to the right place and read extra bytes
+      ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
+      file.readFully((size - readSize - extra), extraBuf.array(),
+          extraBuf.arrayOffset() + extraBuf.position(), extra);
+      extraBuf.position(extra);
+      //append with already read bytes
+      extraBuf.put(buffer);
+      buffer = extraBuf;
+      buffer.position(0);
+      fullFooterBuffer = buffer.slice();
+      buffer.limit(footerSize + metadataSize);
+    } else {
+      //footer is already in the bytes in buffer, just adjust position, length
+      buffer.position(psOffset - footerSize - metadataSize);
+      fullFooterBuffer = buffer.slice();
+      buffer.limit(psOffset);
+    }
+
+    // remember position for later
+    buffer.mark();
+
+    file.close();
+
+    return new FileMetaInfo(
+        ps.getCompression().toString(),
+        (int) ps.getCompressionBlockSize(),
+        (int) ps.getMetadataLength(),
+        buffer,
+        ps.getVersionList(),
+        org.apache.orc.OrcFile.WriterVersion.FUTURE,
+        fullFooterBuffer
+    );
+  }
+
+  public OrcRecordReader createRecordReader() throws IOException {
+    return new OrcRecordReader(this.stripes, fileSystem, schema, targets, fragment, types, codec, bufferSize,
+        rowIndexStride, buildReaderOptions(meta), conf,
+        TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, TajoConstants.DEFAULT_SYSTEM_TIMEZONE)));
+  }
+
+  private static Options buildReaderOptions(TableMeta meta) {
+    return new Options()
+        .useZeroCopy(Boolean.parseBoolean(meta.getProperty(OrcConf.USE_ZEROCOPY.getAttribute(),
+            String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue()))))
+        .skipCorruptRecords(Boolean.parseBoolean(meta.getProperty(OrcConf.SKIP_CORRUPT_DATA.getAttribute(),
+            String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue()))));
+  }
+
+  @Override
+  public void init() throws IOException {
+    FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fileSystem, path, maxLength);
+    this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
+    MetaInfoObjExtractor rInfo =
+        new MetaInfoObjExtractor(footerMetaData.compressionType,
+            footerMetaData.bufferSize,
+            footerMetaData.metadataSize,
+            footerMetaData.footerBuffer
+        );
+    this.footerByteBuffer = footerMetaData.footerBuffer;
+    this.compressionKind = rInfo.compressionKind;
+    this.codec = rInfo.codec;
+    this.bufferSize = rInfo.bufferSize;
+    this.metadataSize = rInfo.metadataSize;
+    this.stripeStats = rInfo.metadata.getStripeStatsList();
+    this.types = rInfo.footer.getTypesList();
+    this.rowIndexStride = rInfo.footer.getRowIndexStride();
+    this.contentLength = rInfo.footer.getContentLength();
+    this.numberOfRows = rInfo.footer.getNumberOfRows();
+    this.userMetadata = rInfo.footer.getMetadataList();
+    this.fileStats = rInfo.footer.getStatisticsList();
+    this.versionList = footerMetaData.versionList;
+    this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList());
+
+    recordReader = createRecordReader();
+
+    super.init();
+  }
+
+  @Override
+  public Tuple next() throws IOException {
+    Tuple next = recordReader.next();
+    if (next != null) {
+      recordCount++;
+    }
+    return next;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    // TODO: improve this
+    this.close();
+    recordReader = createRecordReader();
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (recordReader != null) {
+      recordReader.close();
+      tableStats.setNumBytes(recordReader.getNumBytes());
+      tableStats.setNumRows(recordCount);
+    }
+  }
+
+  @Override
+  public boolean isProjectable() {
+    return true;
+  }
+
+  @Override
+  public boolean isSelectable() {
+    return false;
+  }
+
+  @Override
+  public void setFilter(EvalNode filter) {
+    // TODO: implement this
+  }
+
+  @Override
+  public float getProgress() {
+    return inited ? recordReader.getProgress() : super.getProgress();
+  }
+
+  @Override
+  public boolean isSplittable() {
+    return true;
+  }
+
+  private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
+                                                       int psLen, int psAbsOffset) throws IOException {
+    // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
+    assert bb.hasArray();
+    CodedInputStream in = CodedInputStream.newInstance(
+        bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
+    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+    checkOrcVersion(LOG, path, ps.getVersionList());
+
+    // Check compression codec.
+    switch (ps.getCompression()) {
+      case NONE:
+        break;
+      case ZLIB:
+        break;
+      case SNAPPY:
+        break;
+      case LZO:
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown compression");
+    }
+    return ps;
+  }
+
+  private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
+                                               int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(footerAbsPos);
+    bb.limit(footerAbsPos + footerSize);
+    return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
+        Lists.newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
+  }
+
+  private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
+                                                   int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
+    bb.position(metadataAbsPos);
+    bb.limit(metadataAbsPos + metadataSize);
+    return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
+        Lists.newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
+  }
+
+  /**
+   * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
+   *  from serialized fields.
+   * As the fields are final, the fields need to be initialized in the constructor and
+   *  can't be done in some helper function. So this helper class is used instead.
+   *
+   */
+  private static class MetaInfoObjExtractor{
+    final org.apache.orc.CompressionKind compressionKind;
+    final CompressionCodec codec;
+    final int bufferSize;
+    final int metadataSize;
+    final OrcProto.Metadata metadata;
+    final OrcProto.Footer footer;
+
+    MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize,
+                         ByteBuffer footerBuffer) throws IOException {
+
+      this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr);
+      this.bufferSize = bufferSize;
+      this.codec = OrcUtils.createCodec(compressionKind);
+      this.metadataSize = metadataSize;
+
+      int position = footerBuffer.position();
+      int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
+
+      this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
+      this.footer = extractFooter(
+          footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);
+
+      footerBuffer.position(position);
+    }
+  }
+
+  public static class StripeInformationImpl
+      implements org.apache.orc.StripeInformation {
+    private final OrcProto.StripeInformation stripe;
+
+    public StripeInformationImpl(OrcProto.StripeInformation stripe) {
+      this.stripe = stripe;
+    }
+
+    @Override
+    public long getOffset() {
+      return stripe.getOffset();
+    }
+
+    @Override
+    public long getLength() {
+      return stripe.getDataLength() + getIndexLength() + getFooterLength();
+    }
+
+    @Override
+    public long getDataLength() {
+      return stripe.getDataLength();
+    }
+
+    @Override
+    public long getFooterLength() {
+      return stripe.getFooterLength();
+    }
+
+    @Override
+    public long getIndexLength() {
+      return stripe.getIndexLength();
+    }
+
+    @Override
+    public long getNumberOfRows() {
+      return stripe.getNumberOfRows();
+    }
+
+    @Override
+    public String toString() {
+      return "offset: " + getOffset() + " data: " + getDataLength() +
+          " rows: " + getNumberOfRows() + " tail: " + getFooterLength() +
+          " index: " + getIndexLength();
+    }
+  }
+
+  private static List<StripeInformation> convertProtoStripesToStripes(
+      List<OrcProto.StripeInformation> stripes) {
+    List<StripeInformation> result = new ArrayList<>(stripes.size());
+    for (OrcProto.StripeInformation info : stripes) {
+      result.add(new StripeInformationImpl(info));
+    }
+    return result;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java
deleted file mode 100644
index 061ba0d..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.orc.objectinspector;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.common.TajoDataTypes;
-import org.apache.tajo.exception.UnsupportedException;
-
-public class ObjectInspectorFactory {
-
-  public static StructObjectInspector buildStructObjectInspector(Schema schema) {
-    StructObjectInspector structOI = new TajoStructObjectInspector(schema);
-    return structOI;
-  }
-
-  public static ObjectInspector buildObjectInspectorByType(TajoDataTypes.Type dataType) throws UnsupportedException {
-    ObjectInspector oi = null;
-
-    switch(dataType) {
-      case BOOLEAN:
-        oi = new TajoBooleanObjectInspector();
-        break;
-
-      case INT2:
-        oi = new TajoShortObjectInspector();
-        break;
-
-      case INET4:
-      case INT4:
-        oi = new TajoIntObjectInspector();
-        break;
-
-      case INT8:
-        oi = new TajoLongObjectInspector();
-        break;
-
-      case FLOAT4:
-        oi = new TajoFloatObjectInspector();
-        break;
-
-      case FLOAT8:
-        oi = new TajoDoubleObjectInspector();
-        break;
-
-      case TEXT:
-      case CHAR:
-        oi = new TajoStringObjectInspector();
-        break;
-
-      case TIMESTAMP:
-        oi = new TajoTimestampObjectInspector();
-        break;
-
-      case DATE:
-        oi = new TajoDateObjectInspector();
-        break;
-
-      case BLOB:
-      case PROTOBUF:
-        oi = new TajoBlobObjectInspector();
-        break;
-
-      case NULL_TYPE:
-        oi = new TajoNullObjectInspector();
-        break;
-
-      default:
-        throw new UnsupportedException(dataType.name()+" is not supported yet in OrcAppender");
-    }
-
-    return oi;
-  }
-}

[5/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Posted by ji...@apache.org.

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DecimalColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DecimalColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DecimalColumnStatistics.java
deleted file mode 100644
index 27cdac2..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DecimalColumnStatistics.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-
-/**
- * Statistics for decimal columns.
- */
-public interface DecimalColumnStatistics extends ColumnStatistics {
-
-  /**
-   * Get the minimum value for the column.
-   * @return the minimum value
-   */
-  HiveDecimal getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return the maximum value
-   */
-  HiveDecimal getMaximum();
-
-  /**
-   * Get the sum of the values of the column.
-   * @return the sum
-   */
-  HiveDecimal getSum();
-
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DirectDecompressionCodec.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DirectDecompressionCodec.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DirectDecompressionCodec.java
deleted file mode 100644
index 5333052..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DirectDecompressionCodec.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-public interface DirectDecompressionCodec extends CompressionCodec {
-  public boolean isAvailable();
-  public void directDecompress(ByteBuffer in, ByteBuffer out) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DoubleColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DoubleColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DoubleColumnStatistics.java
deleted file mode 100644
index ddce8f7..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DoubleColumnStatistics.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Statistics for float and double columns.
- */
-public interface DoubleColumnStatistics extends ColumnStatistics {
-
-  /**
-   * Get the smallest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the minimum
-   */
-  double getMinimum();
-
-  /**
-   * Get the largest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the maximum
-   */
-  double getMaximum();
-
-  /**
-   * Get the sum of the values in the column.
-   * @return the sum
-   */
-  double getSum();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicByteArray.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicByteArray.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicByteArray.java
deleted file mode 100644
index 1d44f77..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicByteArray.java
+++ /dev/null
@@ -1,303 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import org.apache.hadoop.io.Text;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-/**
- * A class that is a growable array of bytes. Growth is managed in terms of
- * chunks that are allocated when needed.
- */
-final class DynamicByteArray {
-  static final int DEFAULT_CHUNKSIZE = 32 * 1024;
-  static final int DEFAULT_NUM_CHUNKS = 128;
-
-  private final int chunkSize;        // our allocation sizes
-  private byte[][] data;              // the real data
-  private int length;                 // max set element index +1
-  private int initializedChunks = 0;  // the number of chunks created
-
-  public DynamicByteArray() {
-    this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
-  }
-
-  public DynamicByteArray(int numChunks, int chunkSize) {
-    if (chunkSize == 0) {
-      throw new IllegalArgumentException("bad chunksize");
-    }
-    this.chunkSize = chunkSize;
-    data = new byte[numChunks][];
-  }
-
-  /**
-   * Ensure that the given index is valid.
-   */
-  private void grow(int chunkIndex) {
-    if (chunkIndex >= initializedChunks) {
-      if (chunkIndex >= data.length) {
-        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
-        byte[][] newChunk = new byte[newSize][];
-        System.arraycopy(data, 0, newChunk, 0, data.length);
-        data = newChunk;
-      }
-      for(int i=initializedChunks; i <= chunkIndex; ++i) {
-        data[i] = new byte[chunkSize];
-      }
-      initializedChunks = chunkIndex + 1;
-    }
-  }
-
-  public byte get(int index) {
-    if (index >= length) {
-      throw new IndexOutOfBoundsException("Index " + index +
-                                            " is outside of 0.." +
-                                            (length - 1));
-    }
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    return data[i][j];
-  }
-
-  public void set(int index, byte value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] = value;
-  }
-
-  public int add(byte value) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow(i);
-    data[i][j] = value;
-    int result = length;
-    length += 1;
-    return result;
-  }
-
-  /**
-   * Copy a slice of a byte array into our buffer.
-   * @param value the array to copy from
-   * @param valueOffset the first location to copy from value
-   * @param valueLength the number of bytes to copy from value
-   * @return the offset of the start of the value
-   */
-  public int add(byte[] value, int valueOffset, int valueLength) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow((length + valueLength) / chunkSize);
-    int remaining = valueLength;
-    while (remaining > 0) {
-      int size = Math.min(remaining, chunkSize - j);
-      System.arraycopy(value, valueOffset, data[i], j, size);
-      remaining -= size;
-      valueOffset += size;
-      i += 1;
-      j = 0;
-    }
-    int result = length;
-    length += valueLength;
-    return result;
-  }
-
-  /**
-   * Read the entire stream into this array.
-   * @param in the stream to read from
-   * @throws IOException
-   */
-  public void readAll(InputStream in) throws IOException {
-    int currentChunk = length / chunkSize;
-    int currentOffset = length % chunkSize;
-    grow(currentChunk);
-    int currentLength = in.read(data[currentChunk], currentOffset,
-      chunkSize - currentOffset);
-    while (currentLength > 0) {
-      length += currentLength;
-      currentOffset = length % chunkSize;
-      if (currentOffset == 0) {
-        currentChunk = length / chunkSize;
-        grow(currentChunk);
-      }
-      currentLength = in.read(data[currentChunk], currentOffset,
-        chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Byte compare a set of bytes against the bytes in this dynamic array.
-   * @param other source of the other bytes
-   * @param otherOffset start offset in the other array
-   * @param otherLength number of bytes in the other array
-   * @param ourOffset the offset in our array
-   * @param ourLength the number of bytes in our array
-   * @return negative for less, 0 for equal, positive for greater
-   */
-  public int compare(byte[] other, int otherOffset, int otherLength,
-                     int ourOffset, int ourLength) {
-    int currentChunk = ourOffset / chunkSize;
-    int currentOffset = ourOffset % chunkSize;
-    int maxLength = Math.min(otherLength, ourLength);
-    while (maxLength > 0 &&
-      other[otherOffset] == data[currentChunk][currentOffset]) {
-      otherOffset += 1;
-      currentOffset += 1;
-      if (currentOffset == chunkSize) {
-        currentChunk += 1;
-        currentOffset = 0;
-      }
-      maxLength -= 1;
-    }
-    if (maxLength == 0) {
-      return otherLength - ourLength;
-    }
-    int otherByte = 0xff & other[otherOffset];
-    int ourByte = 0xff & data[currentChunk][currentOffset];
-    return otherByte > ourByte ? 1 : -1;
-  }
-
-  /**
-   * Get the size of the array.
-   * @return the number of bytes in the array
-   */
-  public int size() {
-    return length;
-  }
-
-  /**
-   * Clear the array to its original pristine state.
-   */
-  public void clear() {
-    length = 0;
-    for(int i=0; i < data.length; ++i) {
-      data[i] = null;
-    }
-    initializedChunks = 0;
-  }
-
-  /**
-   * Set a text value from the bytes in this dynamic array.
-   * @param result the value to set
-   * @param offset the start of the bytes to copy
-   * @param length the number of bytes to copy
-   */
-  public void setText(Text result, int offset, int length) {
-    result.clear();
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    int currentLength = Math.min(length, chunkSize - currentOffset);
-    while (length > 0) {
-      result.append(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-      currentLength = Math.min(length, chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Write out a range of this dynamic array to an output stream.
-   * @param out the stream to write to
-   * @param offset the first offset to write
-   * @param length the number of bytes to write
-   * @throws IOException
-   */
-  public void write(OutputStream out, int offset,
-                    int length) throws IOException {
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    while (length > 0) {
-      int currentLength = Math.min(length, chunkSize - currentOffset);
-      out.write(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-    }
-  }
-
-  @Override
-  public String toString() {
-    int i;
-    StringBuilder sb = new StringBuilder(length * 3);
-
-    sb.append('{');
-    int l = length - 1;
-    for (i=0; i<l; i++) {
-      sb.append(Integer.toHexString(get(i)));
-      sb.append(',');
-    }
-    sb.append(get(i));
-    sb.append('}');
-
-    return sb.toString();
-  }
-
-  public void setByteBuffer(ByteBuffer result, int offset, int length) {
-    result.clear();
-    int currentChunk = offset / chunkSize;
-    int currentOffset = offset % chunkSize;
-    int currentLength = Math.min(length, chunkSize - currentOffset);
-    while (length > 0) {
-      result.put(data[currentChunk], currentOffset, currentLength);
-      length -= currentLength;
-      currentChunk += 1;
-      currentOffset = 0;
-      currentLength = Math.min(length, chunkSize - currentOffset);
-    }
-  }
-
-  /**
-   * Gets all the bytes of the array.
-   *
-   * @return Bytes of the array
-   */
-  public byte[] get() {
-    byte[] result = null;
-    if (length > 0) {
-      int currentChunk = 0;
-      int currentOffset = 0;
-      int currentLength = Math.min(length, chunkSize);
-      int destOffset = 0;
-      result = new byte[length];
-      int totalLength = length;
-      while (totalLength > 0) {
-        System.arraycopy(data[currentChunk], currentOffset, result, destOffset, currentLength);
-        destOffset += currentLength;
-        totalLength -= currentLength;
-        currentChunk += 1;
-        currentOffset = 0;
-        currentLength = Math.min(totalLength, chunkSize - currentOffset);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Get the size of the buffers.
-   */
-  public long getSizeInBytes() {
-    return initializedChunks * chunkSize;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicIntArray.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicIntArray.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicIntArray.java
deleted file mode 100644
index a347706..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/DynamicIntArray.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Dynamic int array that uses primitive types and chunks to avoid copying
- * large number of integers when it resizes.
- *
- * The motivation for this class is memory optimization, i.e. space efficient
- * storage of potentially huge arrays without good a-priori size guesses.
- *
- * The API of this class is between a primitive array and a AbstractList. It's
- * not a Collection implementation because it handles primitive types, but the
- * API could be extended to support iterators and the like.
- *
- * NOTE: Like standard Collection implementations/arrays, this class is not
- * synchronized.
- */
-final class DynamicIntArray {
-  static final int DEFAULT_CHUNKSIZE = 8 * 1024;
-  static final int INIT_CHUNKS = 128;
-
-  private final int chunkSize;       // our allocation size
-  private int[][] data;              // the real data
-  private int length;                // max set element index +1
-  private int initializedChunks = 0; // the number of created chunks
-
-  public DynamicIntArray() {
-    this(DEFAULT_CHUNKSIZE);
-  }
-
-  public DynamicIntArray(int chunkSize) {
-    this.chunkSize = chunkSize;
-
-    data = new int[INIT_CHUNKS][];
-  }
-
-  /**
-   * Ensure that the given index is valid.
-   */
-  private void grow(int chunkIndex) {
-    if (chunkIndex >= initializedChunks) {
-      if (chunkIndex >= data.length) {
-        int newSize = Math.max(chunkIndex + 1, 2 * data.length);
-        int[][] newChunk = new int[newSize][];
-        System.arraycopy(data, 0, newChunk, 0, data.length);
-        data = newChunk;
-      }
-      for (int i=initializedChunks; i <= chunkIndex; ++i) {
-        data[i] = new int[chunkSize];
-      }
-      initializedChunks = chunkIndex + 1;
-    }
-  }
-
-  public int get(int index) {
-    if (index >= length) {
-      throw new IndexOutOfBoundsException("Index " + index +
-                                            " is outside of 0.." +
-                                            (length - 1));
-    }
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    return data[i][j];
-  }
-
-  public void set(int index, int value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] = value;
-  }
-
-  public void increment(int index, int value) {
-    int i = index / chunkSize;
-    int j = index % chunkSize;
-    grow(i);
-    if (index >= length) {
-      length = index + 1;
-    }
-    data[i][j] += value;
-  }
-
-  public void add(int value) {
-    int i = length / chunkSize;
-    int j = length % chunkSize;
-    grow(i);
-    data[i][j] = value;
-    length += 1;
-  }
-
-  public int size() {
-    return length;
-  }
-
-  public void clear() {
-    length = 0;
-    for(int i=0; i < data.length; ++i) {
-      data[i] = null;
-    }
-    initializedChunks = 0;
-  }
-
-  public String toString() {
-    int i;
-    StringBuilder sb = new StringBuilder(length * 4);
-
-    sb.append('{');
-    int l = length - 1;
-    for (i=0; i<l; i++) {
-      sb.append(get(i));
-      sb.append(',');
-    }
-    sb.append(get(i));
-    sb.append('}');
-
-    return sb.toString();
-  }
-
-  public int getSizeInBytes() {
-    return 4 * initializedChunks * chunkSize;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/HdfsOrcDataSource.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/HdfsOrcDataSource.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/HdfsOrcDataSource.java
deleted file mode 100644
index 5357f51..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/HdfsOrcDataSource.java
+++ /dev/null
@@ -1,133 +0,0 @@
-
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import com.facebook.presto.orc.DiskRange;
-import com.facebook.presto.orc.OrcDataSource;
-import com.google.common.collect.ImmutableMap;
-import io.airlift.slice.BasicSliceInput;
-import io.airlift.slice.FixedLengthSliceInput;
-import io.airlift.units.DataSize;
-import org.apache.hadoop.fs.FSDataInputStream;
-
-import java.io.IOException;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import static com.facebook.presto.orc.OrcDataSourceUtils.getDiskRangeSlice;
-import static com.facebook.presto.orc.OrcDataSourceUtils.mergeAdjacentDiskRanges;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-/**
- * HDFS File data source class for Orc Reader
- *
- * Most of code is from Presto
- */
-public class HdfsOrcDataSource
-  implements OrcDataSource
-{
-  private final FSDataInputStream inputStream;
-  private final String path;
-  private final long size;
-  private final DataSize maxMergeDistance;
-  private final DataSize maxReadSize;
-  private long readTimeNanos;
-
-  public HdfsOrcDataSource(String path, FSDataInputStream inputStream, long size,
-                           DataSize maxMergeDistance, DataSize maxReadSize)
-  {
-    this.path = checkNotNull(path, "path is null");
-    this.inputStream = checkNotNull(inputStream, "inputStream is null");
-    this.size = size;
-    checkArgument(size >= 0, "size is negative");
-
-    this.maxMergeDistance = checkNotNull(maxMergeDistance, "maxMergeDistance is null");
-    this.maxReadSize = checkNotNull(maxReadSize, "maxMergeDistance is null");
-  }
-
-  @Override
-  public void close()
-    throws IOException
-  {
-    inputStream.close();
-  }
-
-  @Override
-  public long getReadTimeNanos()
-  {
-    return readTimeNanos;
-  }
-
-  @Override
-  public long getSize()
-  {
-    return size;
-  }
-
-  @Override
-  public void readFully(long position, byte[] buffer)
-    throws IOException
-  {
-    readFully(position, buffer, 0, buffer.length);
-  }
-
-  @Override
-  public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength)
-    throws IOException
-  {
-    long start = System.nanoTime();
-
-    inputStream.readFully(position, buffer, bufferOffset, bufferLength);
-    readTimeNanos += System.nanoTime() - start;
-  }
-
-  @Override
-  public <K> Map<K, FixedLengthSliceInput> readFully(Map<K, DiskRange> diskRanges)
-    throws IOException
-  {
-    checkNotNull(diskRanges, "diskRanges is null");
-
-    if (diskRanges.isEmpty()) {
-      return ImmutableMap.of();
-    }
-
-    Iterable<DiskRange> mergedRanges = mergeAdjacentDiskRanges(diskRanges.values(), maxMergeDistance, maxReadSize);
-
-    // read ranges
-    Map<DiskRange, byte[]> buffers = new LinkedHashMap<>();
-    for (DiskRange mergedRange : mergedRanges) {
-      // read full range in one request
-      byte[] buffer = new byte[mergedRange.getLength()];
-      readFully(mergedRange.getOffset(), buffer);
-      buffers.put(mergedRange, buffer);
-    }
-
-    ImmutableMap.Builder<K, FixedLengthSliceInput> slices = ImmutableMap.builder();
-    diskRanges.forEach((K key, DiskRange range) ->
-        slices.put(key, new BasicSliceInput(getDiskRangeSlice(range, buffers))));
-
-    return slices.build();
-  }
-
-  @Override
-  public String toString()
-  {
-    return path;
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerColumnStatistics.java
deleted file mode 100644
index 208454f..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerColumnStatistics.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Statistics for all of the integer columns, such as byte, short, int, and
- * long.
- */
-public interface IntegerColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the smallest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the minimum
-   */
-  long getMinimum();
-
-  /**
-   * Get the largest value in the column. Only defined if getNumberOfValues
-   * is non-zero.
-   * @return the maximum
-   */
-  long getMaximum();
-
-  /**
-   * Is the sum defined? If the sum overflowed the counter this will be false.
-   * @return is the sum available
-   */
-  boolean isSumDefined();
-
-  /**
-   * Get the sum of the column. Only valid if isSumDefined returns true.
-   * @return the sum of the column
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerWriter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerWriter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerWriter.java
deleted file mode 100644
index 6872882..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/IntegerWriter.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.IOException;
-
-/**
- * Interface for writing integers.
- */
-interface IntegerWriter {
-
-  /**
-   * Get position from the stream.
-   * @param recorder
-   * @throws IOException
-   */
-  void getPosition(PositionRecorder recorder) throws IOException;
-
-  /**
-   * Write the integer value
-   * @param value
-   * @throws IOException
-   */
-  void write(long value) throws IOException;
-
-  /**
-   * Flush the buffer
-   * @throws IOException
-   */
-  void flush() throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/MemoryManager.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/MemoryManager.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/MemoryManager.java
deleted file mode 100644
index 79af80f..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/MemoryManager.java
+++ /dev/null
@@ -1,212 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import com.google.common.base.Preconditions;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-
-import java.io.IOException;
-import java.lang.management.ManagementFactory;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.locks.ReentrantLock;
-
-/**
- * Implements a memory manager that keeps a global context of how many ORC
- * writers there are and manages the memory between them. For use cases with
- * dynamic partitions, it is easy to end up with many writers in the same task.
- * By managing the size of each allocation, we try to cut down the size of each
- * allocation and keep the task from running out of memory.
- *
- * This class is not thread safe, but is re-entrant - ensure creation and all
- * invocations are triggered from the same thread.
- */
-class MemoryManager {
-
-  private static final Log LOG = LogFactory.getLog(MemoryManager.class);
-
-  /**
-   * How often should we check the memory sizes? Measured in rows added
-   * to all of the writers.
-   */
-  private static final int ROWS_BETWEEN_CHECKS = 5000;
-  private final long totalMemoryPool;
-  private final Map<Path, WriterInfo> writerList =
-          new HashMap<>();
-  private long totalAllocation = 0;
-  private double currentScale = 1;
-  private int rowsAddedSinceCheck = 0;
-  private final OwnedLock ownerLock = new OwnedLock();
-
-  @SuppressWarnings("serial")
-  private static class OwnedLock extends ReentrantLock {
-    public Thread getOwner() {
-      return super.getOwner();
-    }
-  }
-
-  private static class WriterInfo {
-    long allocation;
-    Callback callback;
-    WriterInfo(long allocation, Callback callback) {
-      this.allocation = allocation;
-      this.callback = callback;
-    }
-  }
-
-  public interface Callback {
-    /**
-     * The writer needs to check its memory usage
-     * @param newScale the current scale factor for memory allocations
-     * @return true if the writer was over the limit
-     * @throws IOException
-     */
-    boolean checkMemory(double newScale) throws IOException;
-  }
-
-  /**
-   * Create the memory manager.
-   * @param conf use the configuration to find the maximum size of the memory
-   *             pool.
-   */
-  MemoryManager(Configuration conf) {
-    HiveConf.ConfVars poolVar = HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL;
-    double maxLoad = conf.getFloat(poolVar.varname, poolVar.defaultFloatVal);
-    totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().
-        getHeapMemoryUsage().getMax() * maxLoad);
-    ownerLock.lock();
-  }
-
-  /**
-   * Light weight thread-safety check for multi-threaded access patterns
-   */
-  private void checkOwner() {
-    Preconditions.checkArgument(ownerLock.isHeldByCurrentThread(),
-      "Owner thread expected %s, got %s",
-      ownerLock.getOwner(),
-      Thread.currentThread());
-  }
-
-  /**
-   * Add a new writer's memory allocation to the pool. We use the path
-   * as a unique key to ensure that we don't get duplicates.
-   * @param path the file that is being written
-   * @param requestedAllocation the requested buffer size
-   */
-  void addWriter(Path path, long requestedAllocation,
-                              Callback callback) throws IOException {
-    checkOwner();
-    WriterInfo oldVal = writerList.get(path);
-    // this should always be null, but we handle the case where the memory
-    // manager wasn't told that a writer wasn't still in use and the task
-    // starts writing to the same path.
-    if (oldVal == null) {
-      oldVal = new WriterInfo(requestedAllocation, callback);
-      writerList.put(path, oldVal);
-      totalAllocation += requestedAllocation;
-    } else {
-      // handle a new writer that is writing to the same path
-      totalAllocation += requestedAllocation - oldVal.allocation;
-      oldVal.allocation = requestedAllocation;
-      oldVal.callback = callback;
-    }
-    updateScale(true);
-  }
-
-  /**
-   * Remove the given writer from the pool.
-   * @param path the file that has been closed
-   */
-  void removeWriter(Path path) throws IOException {
-    checkOwner();
-    WriterInfo val = writerList.get(path);
-    if (val != null) {
-      writerList.remove(path);
-      totalAllocation -= val.allocation;
-      if (writerList.isEmpty()) {
-        rowsAddedSinceCheck = 0;
-      }
-      updateScale(false);
-    }
-    if(writerList.isEmpty()) {
-      rowsAddedSinceCheck = 0;
-    }
-  }
-
-  /**
-   * Get the total pool size that is available for ORC writers.
-   * @return the number of bytes in the pool
-   */
-  long getTotalMemoryPool() {
-    return totalMemoryPool;
-  }
-
-  /**
-   * The scaling factor for each allocation to ensure that the pool isn't
-   * oversubscribed.
-   * @return a fraction between 0.0 and 1.0 of the requested size that is
-   * available for each writer.
-   */
-  double getAllocationScale() {
-    return currentScale;
-  }
-
-  /**
-   * Give the memory manager an opportunity for doing a memory check.
-   * @throws IOException
-   */
-  void addedRow() throws IOException {
-    if (++rowsAddedSinceCheck >= ROWS_BETWEEN_CHECKS) {
-      notifyWriters();
-    }
-  }
-
-  /**
-   * Notify all of the writers that they should check their memory usage.
-   * @throws IOException
-   */
-  void notifyWriters() throws IOException {
-    checkOwner();
-    LOG.debug("Notifying writers after " + rowsAddedSinceCheck);
-    for(WriterInfo writer: writerList.values()) {
-      boolean flushed = writer.callback.checkMemory(currentScale);
-      if (LOG.isDebugEnabled() && flushed) {
-        LOG.debug("flushed " + writer.toString());
-      }
-    }
-    rowsAddedSinceCheck = 0;
-  }
-
-  /**
-   * Update the currentScale based on the current allocation and pool size.
-   * This also updates the notificationTrigger.
-   * @param isAllocate is this an allocation?
-   */
-  private void updateScale(boolean isAllocate) throws IOException {
-    if (totalAllocation <= totalMemoryPool) {
-      currentScale = 1;
-    } else {
-      currentScale = (double) totalMemoryPool / totalAllocation;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Metadata.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Metadata.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Metadata.java
deleted file mode 100644
index dfa4c36..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/Metadata.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import com.google.common.collect.Lists;
-
-import java.util.List;
-
-public class Metadata {
-
-  private final OrcProto.Metadata metadata;
-
-  Metadata(OrcProto.Metadata m) {
-    this.metadata = m;
-  }
-
-  /**
-   * Return list of stripe level column statistics
-   *
-   * @return list of stripe statistics
-   */
-  public List<StripeStatistics> getStripeStatistics() {
-    List<StripeStatistics> result = Lists.newArrayList();
-    for (OrcProto.StripeStatistics ss : metadata.getStripeStatsList()) {
-      result.add(new StripeStatistics(ss.getColStatsList()));
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcFile.java
index a291953..8f26d21 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcFile.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcFile.java
@@ -21,11 +21,15 @@ package org.apache.tajo.storage.thirdparty.orc;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import static org.apache.tajo.storage.thirdparty.orc.OrcConf.ConfVars.*;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.FileMetaInfo;
+import org.apache.orc.FileMetadata;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.MemoryManager;
+import org.apache.tajo.storage.orc.ORCAppender;
 
 import java.io.IOException;
+import java.util.Properties;
 import java.util.TimeZone;
 
 /**
@@ -50,9 +54,9 @@ public final class OrcFile {
    * prevent the new reader from reading ORC files generated by any released
    * version of Hive.
    */
-  public static enum Version {
+  public enum Version {
     V_0_11("0.11", 0, 11),
-      V_0_12("0.12", 0, 12);
+    V_0_12("0.12", 0, 12);
 
     public static final Version CURRENT = V_0_12;
 
@@ -102,9 +106,14 @@ public final class OrcFile {
    * For bugs in the writer, but the old readers already read the new data
    * correctly, bump this version instead of the Version.
    */
-  public static enum WriterVersion {
+  public enum WriterVersion {
     ORIGINAL(0),
-      HIVE_8732(1); // corrupted stripe/file maximum column statistics
+    HIVE_8732(1), // corrupted stripe/file maximum column statistics
+    HIVE_4243(2), // use real column names from Hive tables
+    HIVE_12055(3), // vectorized writer
+
+    // Don't use any magic numbers here except for the below:
+    FUTURE(Integer.MAX_VALUE); // a version from a future writer
 
     private final int id;
 
@@ -112,67 +121,111 @@ public final class OrcFile {
       return id;
     }
 
-    private WriterVersion(int id) {
+    WriterVersion(int id) {
       this.id = id;
     }
+
+    private static final WriterVersion[] values;
+    static {
+      // Assumes few non-negative values close to zero.
+      int max = Integer.MIN_VALUE;
+      for (WriterVersion v : WriterVersion.values()) {
+        if (v.id < 0) throw new AssertionError();
+        if (v.id > max && FUTURE.id != v.id) {
+          max = v.id;
+        }
+      }
+      values = new WriterVersion[max + 1];
+      for (WriterVersion v : WriterVersion.values()) {
+        if (v.id < values.length) {
+          values[v.id] = v;
+        }
+      }
+    }
+
+    public static WriterVersion from(int val) {
+      if (val == FUTURE.id) return FUTURE; // Special handling for the magic value.
+      return values[val];
+    }
   }
+  public static final WriterVersion CURRENT_WRITER = WriterVersion.HIVE_12055;
 
-  public static enum EncodingStrategy {
+  public enum EncodingStrategy {
     SPEED, COMPRESSION;
   }
 
-  public static enum CompressionStrategy {
+  public enum CompressionStrategy {
     SPEED, COMPRESSION;
   }
 
-  // Note : these string definitions for table properties are deprecated,
-  // and retained only for backward compatibility, please do not add to
-  // them, add to OrcTableProperties below instead
-  @Deprecated public static final String COMPRESSION = "orc.compress";
-  @Deprecated public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";
-  @Deprecated public static final String STRIPE_SIZE = "orc.stripe.size";
-  @Deprecated public static final String ROW_INDEX_STRIDE = "orc.row.index.stride";
-  @Deprecated public static final String ENABLE_INDEXES = "orc.create.index";
-  @Deprecated public static final String BLOCK_PADDING = "orc.block.padding";
+  // unused
+  private OrcFile() {}
 
-  /**
-   * Enum container for all orc table properties.
-   * If introducing a new orc-specific table property,
-   * add it here.
-   */
-  public static enum OrcTableProperties {
-    COMPRESSION("orc.compress"),
-    COMPRESSION_BLOCK_SIZE("orc.compress.size"),
-    STRIPE_SIZE("orc.stripe.size"),
-    BLOCK_SIZE("orc.block.size"),
-    ROW_INDEX_STRIDE("orc.row.index.stride"),
-    ENABLE_INDEXES("orc.create.index"),
-    BLOCK_PADDING("orc.block.padding"),
-    ENCODING_STRATEGY("orc.encoding.strategy"),
-    BLOOM_FILTER_COLUMNS("orc.bloom.filter.columns"),
-    BLOOM_FILTER_FPP("orc.bloom.filter.fpp");
+  public static class ReaderOptions {
+    private final Configuration conf;
+    private FileSystem filesystem;
+    private FileMetaInfo fileMetaInfo; // TODO: this comes from some place.
+    private long maxLength = Long.MAX_VALUE;
+    private FileMetadata fullFileMetadata; // Propagate from LLAP cache.
+
+    public ReaderOptions(Configuration conf) {
+      this.conf = conf;
+    }
+
+    public ReaderOptions fileMetaInfo(FileMetaInfo info) {
+      fileMetaInfo = info;
+      return this;
+    }
+
+    public ReaderOptions filesystem(FileSystem fs) {
+      this.filesystem = fs;
+      return this;
+    }
+
+    public ReaderOptions maxLength(long val) {
+      maxLength = val;
+      return this;
+    }
 
-    private final String propName;
+    public ReaderOptions fileMetadata(FileMetadata metadata) {
+      this.fullFileMetadata = metadata;
+      return this;
+    }
+
+    public Configuration getConfiguration() {
+      return conf;
+    }
 
-    OrcTableProperties(String propName) {
-      this.propName = propName;
+    public FileSystem getFilesystem() {
+      return filesystem;
     }
 
-    public String getPropName(){
-      return this.propName;
+    public FileMetaInfo getFileMetaInfo() {
+      return fileMetaInfo;
+    }
+
+    public long getMaxLength() {
+      return maxLength;
+    }
+
+    public FileMetadata getFileMetadata() {
+      return fullFileMetadata;
     }
   }
 
-  // unused
-  private OrcFile() {}
+  public static ReaderOptions readerOptions(Configuration conf) {
+    return new ReaderOptions(conf);
+  }
+
+
 
-  public static interface WriterContext {
+  public interface WriterContext {
     Writer getWriter();
   }
 
-  public static interface WriterCallback {
-    public void preStripeWrite(WriterContext context) throws IOException;
-    public void preFooterWrite(WriterContext context) throws IOException;
+  public interface WriterCallback {
+    void preStripeWrite(WriterContext context) throws IOException;
+    void preFooterWrite(WriterContext context) throws IOException;
   }
 
   /**
@@ -181,7 +234,7 @@ public final class OrcFile {
   public static class WriterOptions {
     private final Configuration configuration;
     private FileSystem fileSystemValue = null;
-    private ObjectInspector inspectorValue = null;
+    private TypeDescription schema = null;
     private long stripeSizeValue;
     private long blockSizeValue;
     private int rowIndexStrideValue;
@@ -193,45 +246,42 @@ public final class OrcFile {
     private WriterCallback callback;
     private EncodingStrategy encodingStrategy;
     private CompressionStrategy compressionStrategy;
-    private float paddingTolerance;
+    private double paddingTolerance;
     private String bloomFilterColumns;
     private double bloomFilterFpp;
-    private TimeZone timezone;
 
-    WriterOptions(Configuration conf) {
+    protected WriterOptions(Properties tableProperties, Configuration conf) {
       configuration = conf;
-      memoryManagerValue = getMemoryManager(conf);
-      stripeSizeValue = OrcConf.getLongVar(conf, HIVE_ORC_DEFAULT_STRIPE_SIZE);
-      blockSizeValue = OrcConf.getLongVar(conf, HIVE_ORC_DEFAULT_BLOCK_SIZE);
-      rowIndexStrideValue = OrcConf.getIntVar(conf, HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE);
-      bufferSizeValue = OrcConf.getIntVar(conf, HIVE_ORC_DEFAULT_BUFFER_SIZE);
-      blockPaddingValue = OrcConf.getBoolVar(conf, HIVE_ORC_DEFAULT_BLOCK_PADDING);
-      compressValue = CompressionKind.valueOf(OrcConf.getVar(conf, HIVE_ORC_DEFAULT_COMPRESS));
-      String versionName = OrcConf.getVar(conf, HIVE_ORC_WRITE_FORMAT);
-      if (versionName == null) {
-        versionValue = Version.CURRENT;
-      } else {
-        versionValue = Version.byName(versionName);
-      }
-      String enString =
-          conf.get(OrcConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname);
-      if (enString == null) {
-        encodingStrategy = EncodingStrategy.SPEED;
-      } else {
-        encodingStrategy = EncodingStrategy.valueOf(enString);
-      }
-
-      String compString = conf
-          .get(OrcConf.ConfVars.HIVE_ORC_COMPRESSION_STRATEGY.varname);
-      if (compString == null) {
-        compressionStrategy = CompressionStrategy.SPEED;
-      } else {
-        compressionStrategy = CompressionStrategy.valueOf(compString);
-      }
-
-      paddingTolerance = conf.getFloat(OrcConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
-          OrcConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
-      bloomFilterFpp = BloomFilterIO.DEFAULT_FPP;
+      memoryManagerValue = getStaticMemoryManager(conf);
+      stripeSizeValue = org.apache.orc.OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
+      blockSizeValue = org.apache.orc.OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
+      rowIndexStrideValue =
+          (int) org.apache.orc.OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf);
+      bufferSizeValue = (int) org.apache.orc.OrcConf.BUFFER_SIZE.getLong(tableProperties,
+          conf);
+      blockPaddingValue =
+          org.apache.orc.OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf);
+      compressValue =
+          CompressionKind.valueOf(org.apache.orc.OrcConf.COMPRESS.getString(tableProperties,
+              conf));
+      String versionName = org.apache.orc.OrcConf.WRITE_FORMAT.getString(tableProperties,
+          conf);
+      versionValue = Version.byName(versionName);
+      String enString = org.apache.orc.OrcConf.ENCODING_STRATEGY.getString(tableProperties,
+          conf);
+      encodingStrategy = EncodingStrategy.valueOf(enString);
+
+      String compString =
+          org.apache.orc.OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf);
+      compressionStrategy = CompressionStrategy.valueOf(compString);
+
+      paddingTolerance =
+          org.apache.orc.OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
+
+      bloomFilterColumns = org.apache.orc.OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties,
+          conf);
+      bloomFilterFpp = org.apache.orc.OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties,
+          conf);
     }
 
     /**
@@ -302,7 +352,7 @@ public final class OrcFile {
     /**
      * Sets the tolerance for block padding as a percentage of stripe size.
      */
-    public WriterOptions paddingTolerance(float value) {
+    public WriterOptions paddingTolerance(double value) {
       paddingTolerance = value;
       return this;
     }
@@ -318,7 +368,7 @@ public final class OrcFile {
     /**
      * Specify the false positive probability for bloom filter.
      * @param fpp - false positive probability
-     * @return
+     * @return this
      */
     public WriterOptions bloomFilterFpp(double fpp) {
       bloomFilterFpp = fpp;
@@ -334,11 +384,12 @@ public final class OrcFile {
     }
 
     /**
-     * A required option that sets the object inspector for the rows. Used
-     * to determine the schema for the file.
+     * Set the schema for the file. This is a required parameter.
+     * @param schema the schema for the file.
+     * @return this
      */
-    public WriterOptions inspector(ObjectInspector value) {
-      inspectorValue = value;
+    public WriterOptions setSchema(TypeDescription schema) {
+      this.schema = schema;
       return this;
     }
 
@@ -353,7 +404,7 @@ public final class OrcFile {
     /**
      * Add a listener for when the stripe and file are about to be closed.
      * @param callback the object to be called when the stripe is closed
-     * @return
+     * @return this
      */
     public WriterOptions callback(WriterCallback callback) {
       this.callback = callback;
@@ -363,25 +414,112 @@ public final class OrcFile {
     /**
      * A package local option to set the memory manager.
      */
-    WriterOptions memory(MemoryManager value) {
+    protected WriterOptions memory(MemoryManager value) {
       memoryManagerValue = value;
       return this;
     }
 
-    /**
-     * Tajo-specific
-     */
-    WriterOptions timezone(TimeZone value) {
-      timezone = value;
-      return this;
+    public boolean getBlockPadding() {
+      return blockPaddingValue;
+    }
+
+    public long getBlockSize() {
+      return blockSizeValue;
+    }
+
+    public String getBloomFilterColumns() {
+      return bloomFilterColumns;
     }
+
+    public FileSystem getFileSystem() {
+      return fileSystemValue;
+    }
+
+    public Configuration getConfiguration() {
+      return configuration;
+    }
+
+    public TypeDescription getSchema() {
+      return schema;
+    }
+
+    public long getStripeSize() {
+      return stripeSizeValue;
+    }
+
+    public CompressionKind getCompress() {
+      return compressValue;
+    }
+
+    public WriterCallback getCallback() {
+      return callback;
+    }
+
+    public Version getVersion() {
+      return versionValue;
+    }
+
+    public MemoryManager getMemoryManager() {
+      return memoryManagerValue;
+    }
+
+    public int getBufferSize() {
+      return bufferSizeValue;
+    }
+
+    public int getRowIndexStride() {
+      return rowIndexStrideValue;
+    }
+
+    public CompressionStrategy getCompressionStrategy() {
+      return compressionStrategy;
+    }
+
+    public EncodingStrategy getEncodingStrategy() {
+      return encodingStrategy;
+    }
+
+    public double getPaddingTolerance() {
+      return paddingTolerance;
+    }
+
+    public double getBloomFilterFpp() {
+      return bloomFilterFpp;
+    }
+  }
+
+  /**
+   * Create a set of writer options based on a configuration.
+   * @param conf the configuration to use for values
+   * @return A WriterOptions object that can be modified
+   */
+  public static ORCAppender.WriterOptions writerOptions(Configuration conf) {
+    return new ORCAppender.WriterOptions(null, conf);
   }
 
   /**
-   * Create a default set of write options that can be modified.
+   * Create a set of write options based on a set of table properties and
+   * configuration.
+   * @param tableProperties the properties of the table
+   * @param conf the configuration of the query
+   * @return a WriterOptions object that can be modified
    */
-  public static WriterOptions writerOptions(Configuration conf) {
-    return new WriterOptions(conf);
+  public static WriterOptions writerOptions(Properties tableProperties,
+                                            Configuration conf) {
+    return new WriterOptions(tableProperties, conf);
+  }
+
+  private static synchronized MemoryManager getStaticMemoryManager(
+      final Configuration conf) {
+    if (memoryManager == null) {
+      memoryManager = new ThreadLocal<MemoryManager>() {
+        @Override
+        protected MemoryManager initialValue() {
+          return new MemoryManager(conf);
+        }
+      };
+    }
+    return memoryManager.get();
   }
 
   /**
@@ -393,54 +531,13 @@ public final class OrcFile {
    * @throws IOException
    */
   public static Writer createWriter(Path path,
-                                    WriterOptions opts
-                                    ) throws IOException {
-    FileSystem fs = opts.fileSystemValue == null ?
-      path.getFileSystem(opts.configuration) : opts.fileSystemValue;
-
-    return new WriterImpl(fs, path, opts.configuration, opts.inspectorValue,
-                          opts.stripeSizeValue, opts.compressValue,
-                          opts.bufferSizeValue, opts.rowIndexStrideValue,
-                          opts.memoryManagerValue, opts.blockPaddingValue,
-                          opts.versionValue, opts.callback,
-                          opts.encodingStrategy, opts.compressionStrategy,
-                          opts.paddingTolerance, opts.blockSizeValue,
-                          opts.bloomFilterColumns, opts.bloomFilterFpp,
-                          opts.timezone);
-  }
+                                    WriterOptions opts,
+                                    TimeZone timeZone
+  ) throws IOException {
+    FileSystem fs = opts.getFileSystem() == null ?
+        path.getFileSystem(opts.getConfiguration()) : opts.getFileSystem();
 
-  /**
-   * Create an ORC file writer. This method is provided for API backward
-   * compatability with Hive 0.11.
-   * @param fs file system
-   * @param path filename to write to
-   * @param inspector the ObjectInspector that inspects the rows
-   * @param stripeSize the number of bytes in a stripe
-   * @param compress how to compress the file
-   * @param bufferSize the number of bytes to compress at once
-   * @param rowIndexStride the number of rows between row index entries or
-   *                       0 to suppress all indexes
-   * @return a new ORC file writer
-   * @throws IOException
-   */
-  public static Writer createWriter(FileSystem fs,
-                                    Path path,
-                                    Configuration conf,
-                                    ObjectInspector inspector,
-                                    long stripeSize,
-                                    CompressionKind compress,
-                                    int bufferSize,
-                                    int rowIndexStride,
-                                    TimeZone timeZone) throws IOException {
-    return createWriter(path,
-                        writerOptions(conf)
-                        .fileSystem(fs)
-                        .inspector(inspector)
-                        .stripeSize(stripeSize)
-                        .compress(compress)
-                        .bufferSize(bufferSize)
-                        .rowIndexStride(rowIndexStride)
-                        .timezone(timeZone));
+    return new WriterImpl(fs, path, opts, timeZone);
   }
 
   private static ThreadLocal<MemoryManager> memoryManager = null;

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcRecordReader.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcRecordReader.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcRecordReader.java
new file mode 100644
index 0000000..7194bf4
--- /dev/null
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcRecordReader.java
@@ -0,0 +1,454 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.thirdparty.orc;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.orc.*;
+import org.apache.orc.OrcProto;
+import org.apache.orc.impl.*;
+import org.apache.orc.impl.StreamName;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
+import org.apache.tajo.catalog.TableMeta;
+import org.apache.tajo.storage.Tuple;
+import org.apache.tajo.storage.VTuple;
+import org.apache.tajo.storage.fragment.FileFragment;
+import org.apache.tajo.storage.thirdparty.orc.TreeReaderFactory.DatumTreeReader;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.*;
+
+public class OrcRecordReader implements Closeable {
+
+  private final Log LOG = LogFactory.getLog(OrcRecordReader.class);
+
+  private final Path path;
+  private final long firstRow;
+  private final List<StripeInformation> stripes = new ArrayList<>();
+  private OrcProto.StripeFooter stripeFooter;
+  private final long totalRowCount;
+  private final CompressionCodec codec;
+  private final List<OrcProto.Type> types;
+  private final int bufferSize;
+  private final boolean[] included;
+  private final long rowIndexStride;
+  private long rowInStripe = 0;
+  private int currentStripe = -1;
+  private long rowBaseInStripe = 0;
+  private long rowCountInStripe = 0;
+  private final Map<org.apache.orc.impl.StreamName, InStream> streams = new HashMap<>();
+  DiskRangeList bufferChunks = null;
+  private final TreeReaderFactory.DatumTreeReader[] reader;
+  private final OrcProto.RowIndex[] indexes;
+  private final OrcProto.BloomFilterIndex[] bloomFilterIndices;
+  private final Configuration conf;
+  private final MetadataReader metadata;
+  private final DataReader dataReader;
+  private final Tuple result;
+
+  public OrcRecordReader(List<StripeInformation> stripes,
+                         FileSystem fileSystem,
+                         Schema schema,
+                         Column[] target,
+                         FileFragment fragment,
+                         List<OrcProto.Type> types,
+                         CompressionCodec codec,
+                         int bufferSize,
+                         long strideRate,
+                         Reader.Options options,
+                         Configuration conf,
+                         TimeZone timeZone) throws IOException {
+
+    result = new VTuple(target.length);
+
+    this.conf = conf;
+    this.path = fragment.getPath();
+    this.codec = codec;
+    this.types = types;
+    this.bufferSize = bufferSize;
+    this.included = new boolean[schema.size() + 1];
+    included[0] = target.length > 0; // always include root column except when target schema size is 0
+    Schema targetSchema = new Schema(target);
+    for (int i = 1; i < included.length; i++) {
+      included[i] = targetSchema.contains(schema.getColumn(i - 1));
+    }
+    this.rowIndexStride = strideRate;
+    this.metadata = new MetadataReaderImpl(fileSystem, path, codec, bufferSize, types.size());
+
+    long rows = 0;
+    long skippedRows = 0;
+    long offset = fragment.getStartKey();
+    long maxOffset = fragment.getStartKey() + fragment.getLength();
+    for(StripeInformation stripe: stripes) {
+      long stripeStart = stripe.getOffset();
+      if (offset > stripeStart) {
+        skippedRows += stripe.getNumberOfRows();
+      } else if (stripeStart < maxOffset) {
+        this.stripes.add(stripe);
+        rows += stripe.getNumberOfRows();
+      }
+    }
+
+    // TODO: we could change the ctor to pass this externally
+    this.dataReader = RecordReaderUtils.createDefaultDataReader(fileSystem, path, options.getUseZeroCopy(), codec);
+    this.dataReader.open();
+
+    firstRow = skippedRows;
+    totalRowCount = rows;
+
+    reader = new DatumTreeReader[target.length];
+    for (int i = 0; i < reader.length; i++) {
+      reader[i] = TreeReaderFactory.createTreeReader(timeZone, schema.getColumnId(target[i].getQualifiedName()), target[i],
+          options.getSkipCorruptRecords());
+    }
+
+    indexes = new OrcProto.RowIndex[types.size()];
+    bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
+    advanceToNextRow(reader, 0L, true);
+  }
+
+  /**
+   * Plan the ranges of the file that we need to read given the list of
+   * columns and row groups.
+   *
+   * @param streamList        the list of streams available
+   * @param includedColumns   which columns are needed
+   * @param doMergeBuffers
+   * @return the list of disk ranges that will be loaded
+   */
+  static DiskRangeList planReadPartialDataStreams
+  (List<OrcProto.Stream> streamList,
+   boolean[] includedColumns,
+   boolean doMergeBuffers) {
+    long offset = 0;
+    // figure out which columns have a present stream
+    DiskRangeList.CreateHelper list = new DiskRangeList.CreateHelper();
+    for (OrcProto.Stream stream : streamList) {
+      long length = stream.getLength();
+      int column = stream.getColumn();
+      OrcProto.Stream.Kind streamKind = stream.getKind();
+      // since stream kind is optional, first check if it exists
+      if (stream.hasKind() &&
+          (org.apache.orc.impl.StreamName.getArea(streamKind) == org.apache.orc.impl.StreamName.Area.DATA) &&
+          includedColumns[column]) {
+        RecordReaderUtils.addEntireStreamToRanges(offset, length, list, doMergeBuffers);
+      }
+      offset += length;
+    }
+    return list.extract();
+  }
+
+  void createStreams(List<OrcProto.Stream> streamDescriptions,
+                     DiskRangeList ranges,
+                     boolean[] includeColumn,
+                     CompressionCodec codec,
+                     int bufferSize,
+                     Map<org.apache.orc.impl.StreamName, InStream> streams) throws IOException {
+    long streamOffset = 0;
+    for (OrcProto.Stream streamDesc : streamDescriptions) {
+      int column = streamDesc.getColumn();
+      if ((includeColumn != null && !includeColumn[column]) ||
+          streamDesc.hasKind() &&
+              (org.apache.orc.impl.StreamName.getArea(streamDesc.getKind()) != org.apache.orc.impl.StreamName.Area.DATA)) {
+        streamOffset += streamDesc.getLength();
+        continue;
+      }
+      List<DiskRange> buffers = RecordReaderUtils.getStreamBuffers(
+          ranges, streamOffset, streamDesc.getLength());
+      org.apache.orc.impl.StreamName name = new StreamName(column, streamDesc.getKind());
+      streams.put(name, InStream.create(name.toString(), buffers,
+          streamDesc.getLength(), codec, bufferSize));
+      streamOffset += streamDesc.getLength();
+    }
+  }
+
+  private void readPartialDataStreams(StripeInformation stripe) throws IOException {
+    List<OrcProto.Stream> streamList = stripeFooter.getStreamsList();
+    DiskRangeList toRead = planReadPartialDataStreams(streamList, included, true);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("chunks = " + RecordReaderUtils.stringifyDiskRanges(toRead));
+    }
+    bufferChunks = dataReader.readFileData(toRead, stripe.getOffset(), false);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("merge = " + RecordReaderUtils.stringifyDiskRanges(bufferChunks));
+    }
+
+    createStreams(streamList, bufferChunks, included, codec, bufferSize, streams);
+  }
+
+  /**
+   * Skip over rows that we aren't selecting, so that the next row is
+   * one that we will read.
+   *
+   * @param nextRow the row we want to go to
+   * @throws IOException
+   */
+  private boolean advanceToNextRow(
+      TreeReaderFactory.TreeReader[] reader, long nextRow, boolean canAdvanceStripe)
+      throws IOException {
+    long nextRowInStripe = nextRow - rowBaseInStripe;
+
+    if (nextRowInStripe >= rowCountInStripe) {
+      if (canAdvanceStripe) {
+        advanceStripe();
+      }
+      return canAdvanceStripe;
+    }
+    if (nextRowInStripe != rowInStripe) {
+      if (rowIndexStride != 0) {
+        int rowGroup = (int) (nextRowInStripe / rowIndexStride);
+        seekToRowEntry(reader, rowGroup);
+        for (TreeReaderFactory.TreeReader eachReader : reader) {
+          eachReader.skipRows(nextRowInStripe - rowGroup * rowIndexStride);
+        }
+      } else {
+        for (TreeReaderFactory.TreeReader eachReader : reader) {
+          eachReader.skipRows(nextRowInStripe - rowInStripe);
+        }
+      }
+      rowInStripe = nextRowInStripe;
+    }
+    return true;
+  }
+
+  public boolean hasNext() throws IOException {
+    return rowInStripe < rowCountInStripe;
+  }
+
+  public Tuple next() throws IOException {
+    if (hasNext()) {
+      try {
+        for (int i = 0; i < reader.length; i++) {
+          result.put(i, reader[i].next());
+        }
+        // find the next row
+        rowInStripe += 1;
+        advanceToNextRow(reader, rowInStripe + rowBaseInStripe, true);
+        return result;
+      } catch (IOException e) {
+        // Rethrow exception with file name in log message
+        throw new IOException("Error reading file: " + path, e);
+      }
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Read the next stripe until we find a row that we don't skip.
+   *
+   * @throws IOException
+   */
+  private void advanceStripe() throws IOException {
+    rowInStripe = rowCountInStripe;
+    while (rowInStripe >= rowCountInStripe &&
+        currentStripe < stripes.size() - 1) {
+      currentStripe += 1;
+      readStripe();
+    }
+  }
+
+  /**
+   * Read the current stripe into memory.
+   *
+   * @throws IOException
+   */
+  private void readStripe() throws IOException {
+    StripeInformation stripe = beginReadStripe();
+
+    // if we haven't skipped the whole stripe, read the data
+    if (rowInStripe < rowCountInStripe) {
+      // if we aren't projecting columns or filtering rows, just read it all
+      if (included == null) {
+        readAllDataStreams(stripe);
+      } else {
+        readPartialDataStreams(stripe);
+      }
+
+      for (TreeReaderFactory.TreeReader eachReader : reader) {
+        eachReader.startStripe(streams, stripeFooter);
+      }
+      // if we skipped the first row group, move the pointers forward
+      if (rowInStripe != 0) {
+        seekToRowEntry(reader, (int) (rowInStripe / rowIndexStride));
+      }
+    }
+  }
+
+  private void clearStreams() throws IOException {
+    // explicit close of all streams to de-ref ByteBuffers
+    for (InStream is : streams.values()) {
+      is.close();
+    }
+    if (bufferChunks != null) {
+      if (dataReader.isTrackingDiskRanges()) {
+        for (DiskRangeList range = bufferChunks; range != null; range = range.next) {
+          if (!(range instanceof BufferChunk)) {
+            continue;
+          }
+          dataReader.releaseBuffer(((BufferChunk) range).getChunk());
+        }
+      }
+    }
+    bufferChunks = null;
+    streams.clear();
+  }
+
+  OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
+    return metadata.readStripeFooter(stripe);
+  }
+
+  private StripeInformation beginReadStripe() throws IOException {
+    StripeInformation stripe = stripes.get(currentStripe);
+    stripeFooter = readStripeFooter(stripe);
+    clearStreams();
+    // setup the position in the stripe
+    rowCountInStripe = stripe.getNumberOfRows();
+    rowInStripe = 0;
+    rowBaseInStripe = 0;
+    for (int i = 0; i < currentStripe; ++i) {
+      rowBaseInStripe += stripes.get(i).getNumberOfRows();
+    }
+    // reset all of the indexes
+    for (int i = 0; i < indexes.length; ++i) {
+      indexes[i] = null;
+    }
+    return stripe;
+  }
+
+  private void readAllDataStreams(StripeInformation stripe) throws IOException {
+    long start = stripe.getIndexLength();
+    long end = start + stripe.getDataLength();
+    // explicitly trigger 1 big read
+    DiskRangeList toRead = new DiskRangeList(start, end);
+    bufferChunks = dataReader.readFileData(toRead, stripe.getOffset(), false);
+    List<OrcProto.Stream> streamDescriptions = stripeFooter.getStreamsList();
+    createStreams(streamDescriptions, bufferChunks, included, codec, bufferSize, streams);
+  }
+
+  public long getRowNumber() {
+    return rowInStripe + rowBaseInStripe + firstRow;
+  }
+
+  public float getProgress() {
+    return ((float) rowBaseInStripe + rowInStripe) / totalRowCount;
+  }
+
+  private int findStripe(long rowNumber) {
+    for (int i = 0; i < stripes.size(); i++) {
+      StripeInformation stripe = stripes.get(i);
+      if (stripe.getNumberOfRows() > rowNumber) {
+        return i;
+      }
+      rowNumber -= stripe.getNumberOfRows();
+    }
+    throw new IllegalArgumentException("Seek after the end of reader range");
+  }
+
+  OrcIndex readRowIndex(
+      int stripeIndex, boolean[] included) throws IOException {
+    return readRowIndex(stripeIndex, included, null, null);
+  }
+
+  OrcIndex readRowIndex(int stripeIndex, boolean[] included, OrcProto.RowIndex[] indexes,
+                        OrcProto.BloomFilterIndex[] bloomFilterIndex) throws IOException {
+    StripeInformation stripe = stripes.get(stripeIndex);
+    OrcProto.StripeFooter stripeFooter = null;
+    // if this is the current stripe, use the cached objects.
+    if (stripeIndex == currentStripe) {
+      stripeFooter = this.stripeFooter;
+      indexes = indexes == null ? this.indexes : indexes;
+      bloomFilterIndex = bloomFilterIndex == null ? this.bloomFilterIndices : bloomFilterIndex;
+    }
+    return metadata.readRowIndex(stripe, stripeFooter, included, indexes, null,
+        bloomFilterIndex);
+  }
+
+  private void seekToRowEntry(TreeReaderFactory.TreeReader []reader, int rowEntry)
+      throws IOException {
+    PositionProvider[] index = new PositionProvider[indexes.length];
+    for (int i = 0; i < indexes.length; ++i) {
+      if (indexes[i] != null) {
+        index[i] = new PositionProviderImpl(indexes[i].getEntry(rowEntry));
+      }
+    }
+    for (TreeReaderFactory.TreeReader eachReader : reader) {
+      eachReader.seek(index);
+    }
+  }
+
+  public void seekToRow(long rowNumber) throws IOException {
+    if (rowNumber < 0) {
+      throw new IllegalArgumentException("Seek to a negative row number " +
+          rowNumber);
+    } else if (rowNumber < firstRow) {
+      throw new IllegalArgumentException("Seek before reader range " +
+          rowNumber);
+    }
+    // convert to our internal form (rows from the beginning of slice)
+    rowNumber -= firstRow;
+
+    // move to the right stripe
+    int rightStripe = findStripe(rowNumber);
+    if (rightStripe != currentStripe) {
+      currentStripe = rightStripe;
+      readStripe();
+    }
+    readRowIndex(currentStripe, included);
+
+    // if we aren't to the right row yet, advance in the stripe.
+    advanceToNextRow(reader, rowNumber, true);
+  }
+
+  public long getNumBytes() {
+    return ((RecordReaderUtils.DefaultDataReader)dataReader).getReadBytes();
+  }
+
+  @Override
+  public void close() throws IOException {
+    clearStreams();
+    dataReader.close();
+  }
+
+  public static final class PositionProviderImpl implements PositionProvider {
+    private final OrcProto.RowIndexEntry entry;
+    private int index;
+
+    public PositionProviderImpl(OrcProto.RowIndexEntry entry) {
+      this(entry, 0);
+    }
+
+    public PositionProviderImpl(OrcProto.RowIndexEntry entry, int startPos) {
+      this.entry = entry;
+      this.index = startPos;
+    }
+
+    @Override
+    public long getNext() {
+      return entry.getPositions(index++);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcUtils.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcUtils.java
index 3a474dd..b8d3f52 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcUtils.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/OrcUtils.java
@@ -17,185 +17,101 @@
  */
 package org.apache.tajo.storage.thirdparty.orc;
 
-import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.*;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.SnappyCodec;
+import org.apache.orc.impl.ZlibCodec;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
+import org.apache.tajo.catalog.TypeDesc;
+import org.apache.tajo.exception.TajoRuntimeException;
+import org.apache.tajo.exception.UnsupportedDataTypeException;
 
 public class OrcUtils {
   private static final Log LOG = LogFactory.getLog(OrcUtils.class);
 
-  /**
-   * Returns selected columns as a boolean array with true value set for specified column names.
-   * The result will contain number of elements equal to flattened number of columns.
-   * For example:
-   * selectedColumns - a,b,c
-   * allColumns - a,b,c,d
-   * If column c is a complex type, say list<string> and other types are primitives then result will
-   * be [false, true, true, true, true, true, false]
-   * Index 0 is the root element of the struct which is set to false by default, index 1,2
-   * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
-   * index 5 correspond to column d. After flattening list<string> gets 2 columns.
-   *
-   * @param selectedColumns - comma separated list of selected column names
-   * @param allColumns      - comma separated list of all column names
-   * @param inspector       - object inspector
-   * @return - boolean array with true value set for the specified column names
-   */
-  public static boolean[] includeColumns(String selectedColumns, String allColumns,
-      ObjectInspector inspector) {
-    int numFlattenedCols = getFlattenedColumnsCount(inspector);
-    boolean[] results = new boolean[numFlattenedCols];
-    if ("*".equals(selectedColumns)) {
-      Arrays.fill(results, true);
-      return results;
-    }
-    if (selectedColumns != null && !selectedColumns.isEmpty()) {
-      includeColumnsImpl(results, selectedColumns.toLowerCase(), allColumns, inspector);
-    }
-    return results;
-  }
-
-  private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns,
-      String allColumns,
-      ObjectInspector inspector) {
-      Map<String, List<Integer>> columnSpanMap = getColumnSpan(allColumns, inspector);
-      LOG.info("columnSpanMap: " + columnSpanMap);
-
-      String[] selCols = selectedColumns.split(",");
-      for (String sc : selCols) {
-        if (columnSpanMap.containsKey(sc)) {
-          List<Integer> colSpan = columnSpanMap.get(sc);
-          int start = colSpan.get(0);
-          int end = colSpan.get(1);
-          for (int i = start; i <= end; i++) {
-            includeColumns[i] = true;
+  public static org.apache.orc.CompressionCodec createCodec(org.apache.orc.CompressionKind kind) {
+    switch (kind) {
+      case NONE:
+        return null;
+      case ZLIB:
+        return new ZlibCodec();
+      case SNAPPY:
+        return new SnappyCodec();
+      case LZO:
+        try {
+          ClassLoader loader = Thread.currentThread().getContextClassLoader();
+          if (loader == null) {
+            throw new RuntimeException("error while getting a class loader");
           }
+          @SuppressWarnings("unchecked")
+          Class<? extends org.apache.orc.CompressionCodec> lzo =
+              (Class<? extends CompressionCodec>)
+                  loader.loadClass("org.apache.hadoop.hive.ql.io.orc.LzoCodec");
+          return lzo.newInstance();
+        } catch (ClassNotFoundException e) {
+          throw new IllegalArgumentException("LZO is not available.", e);
+        } catch (InstantiationException e) {
+          throw new IllegalArgumentException("Problem initializing LZO", e);
+        } catch (IllegalAccessException e) {
+          throw new IllegalArgumentException("Insufficient access to LZO", e);
         }
-      }
-
-      LOG.info("includeColumns: " + Arrays.toString(includeColumns));
+      default:
+        throw new IllegalArgumentException("Unknown compression codec: " +
+            kind);
     }
+  }
 
-  private static Map<String, List<Integer>> getColumnSpan(String allColumns,
-      ObjectInspector inspector) {
-    // map that contains the column span for each column. Column span is the number of columns
-    // required after flattening. For a given object inspector this map contains the start column
-    // id and end column id (both inclusive) after flattening.
-    // EXAMPLE:
-    // schema: struct<a:int, b:float, c:map<string,int>>
-    // column span map for the above struct will be
-    // a => [1,1], b => [2,2], c => [3,5]
-    Map<String, List<Integer>> columnSpanMap = new HashMap<>();
-    if (allColumns != null) {
-      String[] columns = allColumns.split(",");
-      int startIdx = 0;
-      int endIdx = 0;
-      if (inspector instanceof StructObjectInspector) {
-        StructObjectInspector soi = (StructObjectInspector) inspector;
-        List<? extends StructField> fields = soi.getAllStructFieldRefs();
-        for (int i = 0; i < fields.size(); i++) {
-          StructField sf = fields.get(i);
-
-          // we get the type (category) from object inspector but column name from the argument.
-          // The reason for this is hive (FileSinkOperator) does not pass the actual column names,
-          // instead it passes the internal column names (_col1,_col2).
-          ObjectInspector sfOI = sf.getFieldObjectInspector();
-          String colName = columns[i];
+  public static TypeDescription convertSchema(Schema schema) {
+    TypeDescription description = TypeDescription.createStruct();
 
-          startIdx = endIdx + 1;
-          switch (sfOI.getCategory()) {
-            case PRIMITIVE:
-              endIdx += 1;
-              break;
-            case STRUCT:
-              endIdx += 1;
-              StructObjectInspector structInsp = (StructObjectInspector) sfOI;
-              List<? extends StructField> structFields = structInsp.getAllStructFieldRefs();
-              for (StructField structField : structFields) {
-                endIdx += getFlattenedColumnsCount(structField.getFieldObjectInspector());
-              }
-              break;
-            case MAP:
-              endIdx += 1;
-              MapObjectInspector mapInsp = (MapObjectInspector) sfOI;
-              endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
-              endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
-              break;
-            case LIST:
-              endIdx += 1;
-              ListObjectInspector listInsp = (ListObjectInspector) sfOI;
-              endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
-              break;
-            case UNION:
-              endIdx += 1;
-              UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI;
-              List<ObjectInspector> choices = unionInsp.getObjectInspectors();
-              for (ObjectInspector choice : choices) {
-                endIdx += getFlattenedColumnsCount(choice);
-              }
-              break;
-            default:
-              throw new IllegalArgumentException("Bad category: " +
-                  inspector.getCategory());
-          }
-
-          columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx));
-        }
-      }
+    for (Column eachColumn : schema.getRootColumns()) {
+      description.addField(eachColumn.getQualifiedName(),
+          convertTypeInfo(eachColumn.getTypeDesc()));
     }
-    return columnSpanMap;
+    return description;
   }
 
-  /**
-   * Returns the number of columns after flatting complex types.
-   *
-   * @param inspector - object inspector
-   * @return
-   */
-  public static int getFlattenedColumnsCount(ObjectInspector inspector) {
-    int numWriters = 0;
-    switch (inspector.getCategory()) {
-      case PRIMITIVE:
-        numWriters += 1;
-        break;
-      case STRUCT:
-        numWriters += 1;
-        StructObjectInspector structInsp = (StructObjectInspector) inspector;
-        List<? extends StructField> fields = structInsp.getAllStructFieldRefs();
-        for (StructField field : fields) {
-          numWriters += getFlattenedColumnsCount(field.getFieldObjectInspector());
-        }
-        break;
-      case MAP:
-        numWriters += 1;
-        MapObjectInspector mapInsp = (MapObjectInspector) inspector;
-        numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
-        numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
-        break;
-      case LIST:
-        numWriters += 1;
-        ListObjectInspector listInsp = (ListObjectInspector) inspector;
-        numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
-        break;
-      case UNION:
-        numWriters += 1;
-        UnionObjectInspector unionInsp = (UnionObjectInspector) inspector;
-        List<ObjectInspector> choices = unionInsp.getObjectInspectors();
-        for (ObjectInspector choice : choices) {
-          numWriters += getFlattenedColumnsCount(choice);
+  public static TypeDescription convertTypeInfo(TypeDesc desc) {
+    switch (desc.getDataType().getType()) {
+      case BOOLEAN:
+        return TypeDescription.createBoolean();
+      case BIT:
+        return TypeDescription.createByte();
+      case INT2:
+        return TypeDescription.createShort();
+      case INT4:
+      case INET4:
+        return TypeDescription.createInt();
+      case INT8:
+        return TypeDescription.createLong();
+      case FLOAT4:
+        return TypeDescription.createFloat();
+      case FLOAT8:
+        return TypeDescription.createDouble();
+      case TEXT:
+        return TypeDescription.createString();
+      case DATE:
+        return TypeDescription.createDate();
+      case TIMESTAMP:
+        return TypeDescription.createTimestamp();
+      case BLOB:
+        return TypeDescription.createBinary();
+      case CHAR:
+        return TypeDescription.createChar()
+            .withMaxLength(desc.getDataType().getLength());
+      case RECORD: {
+        TypeDescription result = TypeDescription.createStruct();
+        for (Column eachColumn : desc.getNestedSchema().getRootColumns()) {
+          result.addField(eachColumn.getQualifiedName(),
+              convertTypeInfo(eachColumn.getTypeDesc()));
         }
-        break;
+        return result;
+      }
       default:
-        throw new IllegalArgumentException("Bad category: " +
-            inspector.getCategory());
+        throw new TajoRuntimeException(new UnsupportedDataTypeException(desc.getDataType().getType().name()));
     }
-    return numWriters;
   }
-
 }

[3/7] tajo git commit: TAJO-2102: Migrate to Apache Orc from Presto's one.

Posted by ji...@apache.org.

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SerializationUtils.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SerializationUtils.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SerializationUtils.java
deleted file mode 100644
index 53687b7..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SerializationUtils.java
+++ /dev/null
@@ -1,844 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.math.BigInteger;
-
-final class SerializationUtils {
-
-  private final static int BUFFER_SIZE = 64;
-  private final byte[] readBuffer;
-  private final byte[] writeBuffer;
-
-  public SerializationUtils() {
-    this.readBuffer = new byte[BUFFER_SIZE];
-    this.writeBuffer = new byte[BUFFER_SIZE];
-  }
-
-  void writeVulong(OutputStream output, long value) throws IOException {
-    while (true) {
-      if ((value & ~0x7f) == 0) {
-        output.write((byte) value);
-        return;
-      } else {
-        output.write((byte) (0x80 | (value & 0x7f)));
-        value >>>= 7;
-      }
-    }
-  }
-
-  void writeVslong(OutputStream output, long value) throws IOException {
-    writeVulong(output, (value << 1) ^ (value >> 63));
-  }
-
-
-  long readVulong(InputStream in) throws IOException {
-    long result = 0;
-    long b;
-    int offset = 0;
-    do {
-      b = in.read();
-      if (b == -1) {
-        throw new EOFException("Reading Vulong past EOF");
-      }
-      result |= (0x7f & b) << offset;
-      offset += 7;
-    } while (b >= 0x80);
-    return result;
-  }
-
-  long readVslong(InputStream in) throws IOException {
-    long result = readVulong(in);
-    return (result >>> 1) ^ -(result & 1);
-  }
-
-  float readFloat(InputStream in) throws IOException {
-    int ser = in.read() | (in.read() << 8) | (in.read() << 16) |
-      (in.read() << 24);
-    return Float.intBitsToFloat(ser);
-  }
-
-  void writeFloat(OutputStream output, float value) throws IOException {
-    int ser = Float.floatToIntBits(value);
-    output.write(ser & 0xff);
-    output.write((ser >> 8) & 0xff);
-    output.write((ser >> 16) & 0xff);
-    output.write((ser >> 24) & 0xff);
-  }
-
-  double readDouble(InputStream in) throws IOException {
-    return Double.longBitsToDouble(readLongLE(in));
-  }
-
-  long readLongLE(InputStream in) throws IOException {
-    in.read(readBuffer, 0, 8);
-    return (((readBuffer[0] & 0xff) << 0)
-        + ((readBuffer[1] & 0xff) << 8)
-        + ((readBuffer[2] & 0xff) << 16)
-        + ((long) (readBuffer[3] & 0xff) << 24)
-        + ((long) (readBuffer[4] & 0xff) << 32)
-        + ((long) (readBuffer[5] & 0xff) << 40)
-        + ((long) (readBuffer[6] & 0xff) << 48)
-        + ((long) (readBuffer[7] & 0xff) << 56));
-  }
-
-  void writeDouble(OutputStream output, double value) throws IOException {
-    writeLongLE(output, Double.doubleToLongBits(value));
-  }
-
-  private void writeLongLE(OutputStream output, long value) throws IOException {
-    writeBuffer[0] = (byte) ((value >> 0)  & 0xff);
-    writeBuffer[1] = (byte) ((value >> 8)  & 0xff);
-    writeBuffer[2] = (byte) ((value >> 16) & 0xff);
-    writeBuffer[3] = (byte) ((value >> 24) & 0xff);
-    writeBuffer[4] = (byte) ((value >> 32) & 0xff);
-    writeBuffer[5] = (byte) ((value >> 40) & 0xff);
-    writeBuffer[6] = (byte) ((value >> 48) & 0xff);
-    writeBuffer[7] = (byte) ((value >> 56) & 0xff);
-    output.write(writeBuffer, 0, 8);
-  }
-
-  /**
-   * Write the arbitrarily sized signed BigInteger in vint format.
-   *
-   * Signed integers are encoded using the low bit as the sign bit using zigzag
-   * encoding.
-   *
-   * Each byte uses the low 7 bits for data and the high bit for stop/continue.
-   *
-   * Bytes are stored LSB first.
-   * @param output the stream to write to
-   * @param value the value to output
-   * @throws IOException
-   */
-  static void writeBigInteger(OutputStream output,
-                              BigInteger value) throws IOException {
-    // encode the signed number as a positive integer
-    value = value.shiftLeft(1);
-    int sign = value.signum();
-    if (sign < 0) {
-      value = value.negate();
-      value = value.subtract(BigInteger.ONE);
-    }
-    int length = value.bitLength();
-    while (true) {
-      long lowBits = value.longValue() & 0x7fffffffffffffffL;
-      length -= 63;
-      // write out the next 63 bits worth of data
-      for(int i=0; i < 9; ++i) {
-        // if this is the last byte, leave the high bit off
-        if (length <= 0 && (lowBits & ~0x7f) == 0) {
-          output.write((byte) lowBits);
-          return;
-        } else {
-          output.write((byte) (0x80 | (lowBits & 0x7f)));
-          lowBits >>>= 7;
-        }
-      }
-      value = value.shiftRight(63);
-    }
-  }
-
-  /**
-   * Read the signed arbitrary sized BigInteger BigInteger in vint format
-   * @param input the stream to read from
-   * @return the read BigInteger
-   * @throws IOException
-   */
-  static BigInteger readBigInteger(InputStream input) throws IOException {
-    BigInteger result = BigInteger.ZERO;
-    long work = 0;
-    int offset = 0;
-    long b;
-    do {
-      b = input.read();
-      if (b == -1) {
-        throw new EOFException("Reading BigInteger past EOF from " + input);
-      }
-      work |= (0x7f & b) << (offset % 63);
-      offset += 7;
-      // if we've read 63 bits, roll them into the result
-      if (offset == 63) {
-        result = BigInteger.valueOf(work);
-        work = 0;
-      } else if (offset % 63 == 0) {
-        result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
-        work = 0;
-      }
-    } while (b >= 0x80);
-    if (work != 0) {
-      result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
-    }
-    // convert back to a signed number
-    boolean isNegative = result.testBit(0);
-    if (isNegative) {
-      result = result.add(BigInteger.ONE);
-      result = result.negate();
-    }
-    result = result.shiftRight(1);
-    return result;
-  }
-
-  enum FixedBitSizes {
-    ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
-    THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
-    TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
-    TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
-  }
-
-  /**
-   * Count the number of bits required to encode the given value
-   * @param value
-   * @return bits required to store value
-   */
-  int findClosestNumBits(long value) {
-    int count = 0;
-    while (value != 0) {
-      count++;
-      value = value >>> 1;
-    }
-    return getClosestFixedBits(count);
-  }
-
-  /**
-   * zigzag encode the given value
-   * @param val
-   * @return zigzag encoded value
-   */
-  long zigzagEncode(long val) {
-    return (val << 1) ^ (val >> 63);
-  }
-
-  /**
-   * zigzag decode the given value
-   * @param val
-   * @return zizag decoded value
-   */
-  long zigzagDecode(long val) {
-    return (val >>> 1) ^ -(val & 1);
-  }
-
-  /**
-   * Compute the bits required to represent pth percentile value
-   * @param data - array
-   * @param p - percentile value (>=0.0 to <=1.0)
-   * @return pth percentile bits
-   */
-  int percentileBits(long[] data, int offset, int length, double p) {
-    if ((p > 1.0) || (p <= 0.0)) {
-      return -1;
-    }
-
-    // histogram that store the encoded bit requirement for each values.
-    // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
-    int[] hist = new int[32];
-
-    // compute the histogram
-    for(int i = offset; i < (offset + length); i++) {
-      int idx = encodeBitWidth(findClosestNumBits(data[i]));
-      hist[idx] += 1;
-    }
-
-    int perLen = (int) (length * (1.0 - p));
-
-    // return the bits required by pth percentile length
-    for(int i = hist.length - 1; i >= 0; i--) {
-      perLen -= hist[i];
-      if (perLen < 0) {
-        return decodeBitWidth(i);
-      }
-    }
-
-    return 0;
-  }
-
-  /**
-   * Calculate the number of bytes required
-   * @param n - number of values
-   * @param numBits - bit width
-   * @return number of bytes required
-   */
-  int getTotalBytesRequired(int n, int numBits) {
-    return (n * numBits + 7) / 8;
-  }
-
-  /**
-   * For a given fixed bit this function will return the closest available fixed
-   * bit
-   * @param n
-   * @return closest valid fixed bit
-   */
-  int getClosestFixedBits(int n) {
-    if (n == 0) {
-      return 1;
-    }
-
-    if (n >= 1 && n <= 24) {
-      return n;
-    } else if (n > 24 && n <= 26) {
-      return 26;
-    } else if (n > 26 && n <= 28) {
-      return 28;
-    } else if (n > 28 && n <= 30) {
-      return 30;
-    } else if (n > 30 && n <= 32) {
-      return 32;
-    } else if (n > 32 && n <= 40) {
-      return 40;
-    } else if (n > 40 && n <= 48) {
-      return 48;
-    } else if (n > 48 && n <= 56) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  public int getClosestAlignedFixedBits(int n) {
-    if (n == 0 ||  n == 1) {
-      return 1;
-    } else if (n > 1 && n <= 2) {
-      return 2;
-    } else if (n > 2 && n <= 4) {
-      return 4;
-    } else if (n > 4 && n <= 8) {
-      return 8;
-    } else if (n > 8 && n <= 16) {
-      return 16;
-    } else if (n > 16 && n <= 24) {
-      return 24;
-    } else if (n > 24 && n <= 32) {
-      return 32;
-    } else if (n > 32 && n <= 40) {
-      return 40;
-    } else if (n > 40 && n <= 48) {
-      return 48;
-    } else if (n > 48 && n <= 56) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  /**
-   * Finds the closest available fixed bit width match and returns its encoded
-   * value (ordinal)
-   * @param n - fixed bit width to encode
-   * @return encoded fixed bit width
-   */
-  int encodeBitWidth(int n) {
-    n = getClosestFixedBits(n);
-
-    if (n >= 1 && n <= 24) {
-      return n - 1;
-    } else if (n > 24 && n <= 26) {
-      return FixedBitSizes.TWENTYSIX.ordinal();
-    } else if (n > 26 && n <= 28) {
-      return FixedBitSizes.TWENTYEIGHT.ordinal();
-    } else if (n > 28 && n <= 30) {
-      return FixedBitSizes.THIRTY.ordinal();
-    } else if (n > 30 && n <= 32) {
-      return FixedBitSizes.THIRTYTWO.ordinal();
-    } else if (n > 32 && n <= 40) {
-      return FixedBitSizes.FORTY.ordinal();
-    } else if (n > 40 && n <= 48) {
-      return FixedBitSizes.FORTYEIGHT.ordinal();
-    } else if (n > 48 && n <= 56) {
-      return FixedBitSizes.FIFTYSIX.ordinal();
-    } else {
-      return FixedBitSizes.SIXTYFOUR.ordinal();
-    }
-  }
-
-  /**
-   * Decodes the ordinal fixed bit value to actual fixed bit width value
-   * @param n - encoded fixed bit width
-   * @return decoded fixed bit width
-   */
-  int decodeBitWidth(int n) {
-    if (n >= FixedBitSizes.ONE.ordinal()
-        && n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
-      return n + 1;
-    } else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
-      return 26;
-    } else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
-      return 28;
-    } else if (n == FixedBitSizes.THIRTY.ordinal()) {
-      return 30;
-    } else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
-      return 32;
-    } else if (n == FixedBitSizes.FORTY.ordinal()) {
-      return 40;
-    } else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
-      return 48;
-    } else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
-      return 56;
-    } else {
-      return 64;
-    }
-  }
-
-  /**
-   * Bitpack and write the input values to underlying output stream
-   * @param input - values to write
-   * @param offset - offset
-   * @param len - length
-   * @param bitSize - bit width
-   * @param output - output stream
-   * @throws IOException
-   */
-  void writeInts(long[] input, int offset, int len, int bitSize,
-                        OutputStream output) throws IOException {
-    if (input == null || input.length < 1 || offset < 0 || len < 1
-        || bitSize < 1) {
-      return;
-    }
-
-    switch (bitSize) {
-    case 1:
-      unrolledBitPack1(input, offset, len, output);
-      return;
-    case 2:
-      unrolledBitPack2(input, offset, len, output);
-      return;
-    case 4:
-      unrolledBitPack4(input, offset, len, output);
-      return;
-    case 8:
-      unrolledBitPack8(input, offset, len, output);
-      return;
-    case 16:
-      unrolledBitPack16(input, offset, len, output);
-      return;
-    case 24:
-      unrolledBitPack24(input, offset, len, output);
-      return;
-    case 32:
-      unrolledBitPack32(input, offset, len, output);
-      return;
-    case 40:
-      unrolledBitPack40(input, offset, len, output);
-      return;
-    case 48:
-      unrolledBitPack48(input, offset, len, output);
-      return;
-    case 56:
-      unrolledBitPack56(input, offset, len, output);
-      return;
-    case 64:
-      unrolledBitPack64(input, offset, len, output);
-      return;
-    default:
-      break;
-    }
-
-    int bitsLeft = 8;
-    byte current = 0;
-    for(int i = offset; i < (offset + len); i++) {
-      long value = input[i];
-      int bitsToWrite = bitSize;
-      while (bitsToWrite > bitsLeft) {
-        // add the bits to the bottom of the current word
-        current |= value >>> (bitsToWrite - bitsLeft);
-        // subtract out the bits we just added
-        bitsToWrite -= bitsLeft;
-        // zero out the bits above bitsToWrite
-        value &= (1L << bitsToWrite) - 1;
-        output.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-      bitsLeft -= bitsToWrite;
-      current |= value << bitsLeft;
-      if (bitsLeft == 0) {
-        output.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-    }
-
-    // flush
-    if (bitsLeft != 8) {
-      output.write(current);
-      current = 0;
-      bitsLeft = 8;
-    }
-  }
-
-  private void unrolledBitPack1(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 1) << 7)
-          | ((input[i + 1] & 1) << 6)
-          | ((input[i + 2] & 1) << 5)
-          | ((input[i + 3] & 1) << 4)
-          | ((input[i + 4] & 1) << 3)
-          | ((input[i + 5] & 1) << 2)
-          | ((input[i + 6] & 1) << 1)
-          | (input[i + 7]) & 1);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 7;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 1) << startShift);
-        startShift -= 1;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack2(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 4;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 3) << 6)
-          | ((input[i + 1] & 3) << 4)
-          | ((input[i + 2] & 3) << 2)
-          | (input[i + 3]) & 3);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 6;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 3) << startShift);
-        startShift -= 2;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack4(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    final int numHops = 2;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int val = 0;
-    for (int i = offset; i < endUnroll; i = i + numHops) {
-      val = (int) (val | ((input[i] & 15) << 4) | (input[i + 1]) & 15);
-      output.write(val);
-      val = 0;
-    }
-
-    if (remainder > 0) {
-      int startShift = 4;
-      for (int i = endUnroll; i < endOffset; i++) {
-        val = (int) (val | (input[i] & 15) << startShift);
-        startShift -= 4;
-      }
-      output.write(val);
-    }
-  }
-
-  private void unrolledBitPack8(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 1);
-  }
-
-  private void unrolledBitPack16(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 2);
-  }
-
-  private void unrolledBitPack24(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 3);
-  }
-
-  private void unrolledBitPack32(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 4);
-  }
-
-  private void unrolledBitPack40(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 5);
-  }
-
-  private void unrolledBitPack48(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 6);
-  }
-
-  private void unrolledBitPack56(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 7);
-  }
-
-  private void unrolledBitPack64(long[] input, int offset, int len,
-      OutputStream output) throws IOException {
-    unrolledBitPackBytes(input, offset, len, output, 8);
-  }
-
-  private void unrolledBitPackBytes(long[] input, int offset, int len, OutputStream output, int numBytes) throws IOException {
-    final int numHops = 8;
-    final int remainder = len % numHops;
-    final int endOffset = offset + len;
-    final int endUnroll = endOffset - remainder;
-    int i = offset;
-    for (; i < endUnroll; i = i + numHops) {
-      writeLongBE(output, input, i, numHops, numBytes);
-    }
-
-    if (remainder > 0) {
-      writeRemainingLongs(output, i, input, remainder, numBytes);
-    }
-  }
-
-  private void writeRemainingLongs(OutputStream output, int offset, long[] input, int remainder,
-      int numBytes) throws IOException {
-    final int numHops = remainder;
-
-    int idx = 0;
-    switch (numBytes) {
-    case 1:
-      while (remainder > 0) {
-        writeBuffer[idx] = (byte) (input[offset + idx] & 255);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 2:
-      while (remainder > 0) {
-        writeLongBE2(output, input[offset + idx], idx * 2);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 3:
-      while (remainder > 0) {
-        writeLongBE3(output, input[offset + idx], idx * 3);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 4:
-      while (remainder > 0) {
-        writeLongBE4(output, input[offset + idx], idx * 4);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 5:
-      while (remainder > 0) {
-        writeLongBE5(output, input[offset + idx], idx * 5);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 6:
-      while (remainder > 0) {
-        writeLongBE6(output, input[offset + idx], idx * 6);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 7:
-      while (remainder > 0) {
-        writeLongBE7(output, input[offset + idx], idx * 7);
-        remainder--;
-        idx++;
-      }
-      break;
-    case 8:
-      while (remainder > 0) {
-        writeLongBE8(output, input[offset + idx], idx * 8);
-        remainder--;
-        idx++;
-      }
-      break;
-    default:
-      break;
-    }
-
-    final int toWrite = numHops * numBytes;
-    output.write(writeBuffer, 0, toWrite);
-  }
-
-  private void writeLongBE(OutputStream output, long[] input, int offset, int numHops, int numBytes) throws IOException {
-
-    switch (numBytes) {
-    case 1:
-      writeBuffer[0] = (byte) (input[offset + 0] & 255);
-      writeBuffer[1] = (byte) (input[offset + 1] & 255);
-      writeBuffer[2] = (byte) (input[offset + 2] & 255);
-      writeBuffer[3] = (byte) (input[offset + 3] & 255);
-      writeBuffer[4] = (byte) (input[offset + 4] & 255);
-      writeBuffer[5] = (byte) (input[offset + 5] & 255);
-      writeBuffer[6] = (byte) (input[offset + 6] & 255);
-      writeBuffer[7] = (byte) (input[offset + 7] & 255);
-      break;
-    case 2:
-      writeLongBE2(output, input[offset + 0], 0);
-      writeLongBE2(output, input[offset + 1], 2);
-      writeLongBE2(output, input[offset + 2], 4);
-      writeLongBE2(output, input[offset + 3], 6);
-      writeLongBE2(output, input[offset + 4], 8);
-      writeLongBE2(output, input[offset + 5], 10);
-      writeLongBE2(output, input[offset + 6], 12);
-      writeLongBE2(output, input[offset + 7], 14);
-      break;
-    case 3:
-      writeLongBE3(output, input[offset + 0], 0);
-      writeLongBE3(output, input[offset + 1], 3);
-      writeLongBE3(output, input[offset + 2], 6);
-      writeLongBE3(output, input[offset + 3], 9);
-      writeLongBE3(output, input[offset + 4], 12);
-      writeLongBE3(output, input[offset + 5], 15);
-      writeLongBE3(output, input[offset + 6], 18);
-      writeLongBE3(output, input[offset + 7], 21);
-      break;
-    case 4:
-      writeLongBE4(output, input[offset + 0], 0);
-      writeLongBE4(output, input[offset + 1], 4);
-      writeLongBE4(output, input[offset + 2], 8);
-      writeLongBE4(output, input[offset + 3], 12);
-      writeLongBE4(output, input[offset + 4], 16);
-      writeLongBE4(output, input[offset + 5], 20);
-      writeLongBE4(output, input[offset + 6], 24);
-      writeLongBE4(output, input[offset + 7], 28);
-      break;
-    case 5:
-      writeLongBE5(output, input[offset + 0], 0);
-      writeLongBE5(output, input[offset + 1], 5);
-      writeLongBE5(output, input[offset + 2], 10);
-      writeLongBE5(output, input[offset + 3], 15);
-      writeLongBE5(output, input[offset + 4], 20);
-      writeLongBE5(output, input[offset + 5], 25);
-      writeLongBE5(output, input[offset + 6], 30);
-      writeLongBE5(output, input[offset + 7], 35);
-      break;
-    case 6:
-      writeLongBE6(output, input[offset + 0], 0);
-      writeLongBE6(output, input[offset + 1], 6);
-      writeLongBE6(output, input[offset + 2], 12);
-      writeLongBE6(output, input[offset + 3], 18);
-      writeLongBE6(output, input[offset + 4], 24);
-      writeLongBE6(output, input[offset + 5], 30);
-      writeLongBE6(output, input[offset + 6], 36);
-      writeLongBE6(output, input[offset + 7], 42);
-      break;
-    case 7:
-      writeLongBE7(output, input[offset + 0], 0);
-      writeLongBE7(output, input[offset + 1], 7);
-      writeLongBE7(output, input[offset + 2], 14);
-      writeLongBE7(output, input[offset + 3], 21);
-      writeLongBE7(output, input[offset + 4], 28);
-      writeLongBE7(output, input[offset + 5], 35);
-      writeLongBE7(output, input[offset + 6], 42);
-      writeLongBE7(output, input[offset + 7], 49);
-      break;
-    case 8:
-      writeLongBE8(output, input[offset + 0], 0);
-      writeLongBE8(output, input[offset + 1], 8);
-      writeLongBE8(output, input[offset + 2], 16);
-      writeLongBE8(output, input[offset + 3], 24);
-      writeLongBE8(output, input[offset + 4], 32);
-      writeLongBE8(output, input[offset + 5], 40);
-      writeLongBE8(output, input[offset + 6], 48);
-      writeLongBE8(output, input[offset + 7], 56);
-      break;
-      default:
-        break;
-    }
-
-    final int toWrite = numHops * numBytes;
-    output.write(writeBuffer, 0, toWrite);
-  }
-
-  private void writeLongBE2(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE3(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE4(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE5(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE6(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE7(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 48);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 6] =  (byte) (val >>> 0);
-  }
-
-  private void writeLongBE8(OutputStream output, long val, int wbOffset) {
-    writeBuffer[wbOffset + 0] =  (byte) (val >>> 56);
-    writeBuffer[wbOffset + 1] =  (byte) (val >>> 48);
-    writeBuffer[wbOffset + 2] =  (byte) (val >>> 40);
-    writeBuffer[wbOffset + 3] =  (byte) (val >>> 32);
-    writeBuffer[wbOffset + 4] =  (byte) (val >>> 24);
-    writeBuffer[wbOffset + 5] =  (byte) (val >>> 16);
-    writeBuffer[wbOffset + 6] =  (byte) (val >>> 8);
-    writeBuffer[wbOffset + 7] =  (byte) (val >>> 0);
-  }
-
-  // Do not want to use Guava LongMath.checkedSubtract() here as it will throw
-  // ArithmeticException in case of overflow
-  public boolean isSafeSubtract(long left, long right) {
-    return (left ^ right) >= 0 | (left ^ (left - right)) >= 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SnappyCodec.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SnappyCodec.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SnappyCodec.java
deleted file mode 100644
index 285a32a..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/SnappyCodec.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import org.apache.hadoop.hive.shims.HadoopShims.DirectCompressionType;
-import org.apache.hadoop.hive.shims.HadoopShims.DirectDecompressorShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.iq80.snappy.Snappy;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.EnumSet;
-
-class SnappyCodec implements CompressionCodec, DirectDecompressionCodec {
-
-  Boolean direct = null;
-
-  @Override
-  public boolean compress(ByteBuffer in, ByteBuffer out,
-                          ByteBuffer overflow) throws IOException {
-    int inBytes = in.remaining();
-    // I should work on a patch for Snappy to support an overflow buffer
-    // to prevent the extra buffer copy.
-    byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
-    int outBytes =
-        Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
-          compressed, 0);
-    if (outBytes < inBytes) {
-      int remaining = out.remaining();
-      if (remaining >= outBytes) {
-        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
-            out.position(), outBytes);
-        out.position(out.position() + outBytes);
-      } else {
-        System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
-            out.position(), remaining);
-        out.position(out.limit());
-        System.arraycopy(compressed, remaining, overflow.array(),
-            overflow.arrayOffset(), outBytes - remaining);
-        overflow.position(outBytes - remaining);
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
-    if(in.isDirect() && out.isDirect()) {
-      directDecompress(in, out);
-      return;
-    }
-    int inOffset = in.position();
-    int uncompressLen =
-        Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
-          in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
-    out.position(uncompressLen + out.position());
-    out.flip();
-  }
-
-  @Override
-  public boolean isAvailable() {
-    if (direct == null) {
-      try {
-        if (ShimLoader.getHadoopShims().getDirectDecompressor(
-            DirectCompressionType.SNAPPY) != null) {
-          direct = Boolean.valueOf(true);
-        } else {
-          direct = Boolean.valueOf(false);
-        }
-      } catch (UnsatisfiedLinkError ule) {
-        direct = Boolean.valueOf(false);
-      }
-    }
-    return direct.booleanValue();
-  }
-
-  @Override
-  public void directDecompress(ByteBuffer in, ByteBuffer out)
-      throws IOException {
-    DirectDecompressorShim decompressShim = ShimLoader.getHadoopShims()
-        .getDirectDecompressor(DirectCompressionType.SNAPPY);
-    decompressShim.decompress(in, out);
-    out.flip(); // flip for read
-  }
-
-  @Override
-  public CompressionCodec modify(EnumSet<Modifier> modifiers) {
-    // snappy allows no modifications
-    return this;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StreamName.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StreamName.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StreamName.java
deleted file mode 100644
index 3821645..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StreamName.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * The name of a stream within a stripe.
- */
-class StreamName implements Comparable<StreamName> {
-  private final int column;
-  private final OrcProto.Stream.Kind kind;
-
-  public enum Area {
-    DATA, INDEX
-  }
-
-  public StreamName(int column, OrcProto.Stream.Kind kind) {
-    this.column = column;
-    this.kind = kind;
-  }
-
-  public boolean equals(Object obj) {
-    if (obj != null && obj instanceof StreamName) {
-      StreamName other = (StreamName) obj;
-      return other.column == column && other.kind == kind;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public int compareTo(StreamName streamName) {
-    if (streamName == null) {
-      return -1;
-    }
-    Area area = getArea(kind);
-    Area otherArea = StreamName.getArea(streamName.kind);
-    if (area != otherArea) {
-      return -area.compareTo(otherArea);
-    }
-    if (column != streamName.column) {
-      return column < streamName.column ? -1 : 1;
-    }
-    return kind.compareTo(streamName.kind);
-  }
-
-  public int getColumn() {
-    return column;
-  }
-
-  public OrcProto.Stream.Kind getKind() {
-    return kind;
-  }
-
-  public Area getArea() {
-    return getArea(kind);
-  }
-
-  public static Area getArea(OrcProto.Stream.Kind kind) {
-    switch (kind) {
-      case ROW_INDEX:
-      case DICTIONARY_COUNT:
-      case BLOOM_FILTER:
-        return Area.INDEX;
-      default:
-        return Area.DATA;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "Stream for column " + column + " kind " + kind;
-  }
-
-  @Override
-  public int hashCode() {
-    return column * 101 + kind.getNumber();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringColumnStatistics.java
deleted file mode 100644
index 4248664..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringColumnStatistics.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Statistics for string columns.
- */
-public interface StringColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum string.
-   * @return the minimum
-   */
-  String getMinimum();
-
-  /**
-   * Get the maximum string.
-   * @return the maximum
-   */
-  String getMaximum();
-
-  /**
-   * Get the total length of all strings
-   * @return the sum (total length)
-   */
-  long getSum();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringRedBlackTree.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringRedBlackTree.java
deleted file mode 100644
index 8835cef..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StringRedBlackTree.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-import org.apache.hadoop.io.Text;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-/**
- * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
- * and an offset for each entry.
- */
-class StringRedBlackTree extends RedBlackTree {
-  private final DynamicByteArray byteArray = new DynamicByteArray();
-  private final DynamicIntArray keyOffsets;
-  private String newKey;
-
-  public StringRedBlackTree(int initialCapacity) {
-    super(initialCapacity);
-    keyOffsets = new DynamicIntArray(initialCapacity);
-  }
-
-  public int add(String value) {
-    newKey = value;
-    return addNewKey();
-  }
-
-  private int addNewKey() {
-    // if the newKey is actually new, add it to our byteArray and store the offset & length
-    if (add()) {
-      int len = newKey.length();
-      keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
-    }
-    return lastAdd;
-  }
-
-  public int add(Text value) {
-    newKey = value.toString();
-    return addNewKey();
-  }
-
-  @Override
-  protected int compareValue(int position) {
-    int start = keyOffsets.get(position);
-    int end;
-    if (position + 1 == keyOffsets.size()) {
-      end = byteArray.size();
-    } else {
-      end = keyOffsets.get(position+1);
-    }
-    return byteArray.compare(newKey.getBytes(), 0, newKey.length(),
-                             start, end - start);
-  }
-
-  /**
-   * The information about each node.
-   */
-  public interface VisitorContext {
-    /**
-     * Get the position where the key was originally added.
-     * @return the number returned by add.
-     */
-    int getOriginalPosition();
-
-    /**
-     * Write the bytes for the string to the given output stream.
-     * @param out the stream to write to.
-     * @throws IOException
-     */
-    void writeBytes(OutputStream out) throws IOException;
-
-    /**
-     * Get the original string.
-     * @return the string
-     */
-    Text getText();
-
-    /**
-     * Get the number of bytes.
-     * @return the string's length in bytes
-     */
-    int getLength();
-  }
-
-  /**
-   * The interface for visitors.
-   */
-  public interface Visitor {
-    /**
-     * Called once for each node of the tree in sort order.
-     * @param context the information about each node
-     * @throws IOException
-     */
-    void visit(VisitorContext context) throws IOException;
-  }
-
-  private class VisitorContextImpl implements VisitorContext {
-    private int originalPosition;
-    private int start;
-    private int end;
-    private final Text text = new Text();
-
-    public int getOriginalPosition() {
-      return originalPosition;
-    }
-
-    public Text getText() {
-      byteArray.setText(text, start, end - start);
-      return text;
-    }
-
-    public void writeBytes(OutputStream out) throws IOException {
-      byteArray.write(out, start, end - start);
-    }
-
-    public int getLength() {
-      return end - start;
-    }
-
-    void setPosition(int position) {
-      originalPosition = position;
-      start = keyOffsets.get(originalPosition);
-      if (position + 1 == keyOffsets.size()) {
-        end = byteArray.size();
-      } else {
-        end = keyOffsets.get(originalPosition + 1);
-      }
-    }
-  }
-
-  private void recurse(int node, Visitor visitor, VisitorContextImpl context
-                      ) throws IOException {
-    if (node != NULL) {
-      recurse(getLeft(node), visitor, context);
-      context.setPosition(node);
-      visitor.visit(context);
-      recurse(getRight(node), visitor, context);
-    }
-  }
-
-  /**
-   * Visit all of the nodes in the tree in sorted order.
-   * @param visitor the action to be applied to each node
-   * @throws IOException
-   */
-  public void visit(Visitor visitor) throws IOException {
-    recurse(root, visitor, new VisitorContextImpl());
-  }
-
-  /**
-   * Reset the table to empty.
-   */
-  public void clear() {
-    super.clear();
-    byteArray.clear();
-    keyOffsets.clear();
-  }
-
-  public void getText(Text result, int originalPosition) {
-    int offset = keyOffsets.get(originalPosition);
-    int length;
-    if (originalPosition + 1 == keyOffsets.size()) {
-      length = byteArray.size() - offset;
-    } else {
-      length = keyOffsets.get(originalPosition + 1) - offset;
-    }
-    byteArray.setText(result, offset, length);
-  }
-
-  /**
-   * Get the size of the character data in the table.
-   * @return the bytes used by the table
-   */
-  public int getCharacterSize() {
-    return byteArray.size();
-  }
-
-  /**
-   * Calculate the approximate size in memory.
-   * @return the number of bytes used in storing the tree.
-   */
-  public long getSizeInBytes() {
-    return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
-      super.getSizeInBytes();
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeInformation.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeInformation.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeInformation.java
deleted file mode 100644
index 62819c1..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeInformation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tajo.storage.thirdparty.orc;
-
-/**
- * Information about the stripes in an ORC file that is provided by the Reader.
- */
-public interface StripeInformation {
-  /**
-   * Get the byte offset of the start of the stripe.
-   * @return the bytes from the start of the file
-   */
-  long getOffset();
-
-  /**
-   * Get the total length of the stripe in bytes.
-   * @return the number of bytes in the stripe
-   */
-  long getLength();
-
-  /**
-   * Get the length of the stripe's indexes.
-   * @return the number of bytes in the index
-   */
-  long getIndexLength();
-
-  /**
-   * Get the length of the stripe's data.
-   * @return the number of bytes in the stripe
-   */
-  long getDataLength();
-
-  /**
-   * Get the length of the stripe's tail section, which contains its index.
-   * @return the number of bytes in the tail
-   */
-  long getFooterLength();
-
-  /**
-   * Get the number of rows in the stripe.
-   * @return a count of the number of rows
-   */
-  long getNumberOfRows();
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeStatistics.java
deleted file mode 100644
index 013fc8e..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/StripeStatistics.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.util.List;
-
-public class StripeStatistics {
-  private final List<OrcProto.ColumnStatistics> cs;
-
-  StripeStatistics(List<OrcProto.ColumnStatistics> list) {
-    this.cs = list;
-  }
-
-  /**
-   * Return list of column statistics
-   *
-   * @return column stats
-   */
-  public ColumnStatistics[] getColumnStatistics() {
-    ColumnStatistics[] result = new ColumnStatistics[cs.size()];
-    for (int i = 0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/tajo/blob/68263585/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TimestampColumnStatistics.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TimestampColumnStatistics.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TimestampColumnStatistics.java
deleted file mode 100644
index 6fad0ac..0000000
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/thirdparty/orc/TimestampColumnStatistics.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.storage.thirdparty.orc;
-
-import java.sql.Timestamp;
-
-/**
- * Statistics for Timestamp columns.
- */
-public interface TimestampColumnStatistics extends ColumnStatistics {
-  /**
-   * Get the minimum value for the column.
-   * @return minimum value
-   */
-  Timestamp getMinimum();
-
-  /**
-   * Get the maximum value for the column.
-   * @return maximum value
-   */
-  Timestamp getMaximum();
-}