You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2015/12/12 00:28:02 UTC
[05/16] hive git commit: HIVE-11890. Create ORC submodue. (omalley reviewed by prasanthj)

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
index 7412e6b..e36a744 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
 import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
+import org.apache.orc.OrcProto;
 
 import java.util.ArrayList;
 import java.util.List;

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
deleted file mode 100644
index aabea0b..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
+++ /dev/null
@@ -1,629 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
-
-import com.google.common.collect.Lists;
-
-public class OrcUtils {
-  private static final Logger LOG = LoggerFactory.getLogger(OrcUtils.class);
-
-  /**
-   * Returns selected columns as a boolean array with true value set for specified column names.
-   * The result will contain number of elements equal to flattened number of columns.
-   * For example:
-   * selectedColumns - a,b,c
-   * allColumns - a,b,c,d
-   * If column c is a complex type, say list<string> and other types are primitives then result will
-   * be [false, true, true, true, true, true, false]
-   * Index 0 is the root element of the struct which is set to false by default, index 1,2
-   * corresponds to columns a and b. Index 3,4 correspond to column c which is list<string> and
-   * index 5 correspond to column d. After flattening list<string> gets 2 columns.
-   *
-   * @param selectedColumns - comma separated list of selected column names
-   * @param schema       - object schema
-   * @return - boolean array with true value set for the specified column names
-   */
-  public static boolean[] includeColumns(String selectedColumns,
-                                         TypeDescription schema) {
-    int numFlattenedCols = schema.getMaximumId();
-    boolean[] results = new boolean[numFlattenedCols + 1];
-    if ("*".equals(selectedColumns)) {
-      Arrays.fill(results, true);
-      return results;
-    }
-    if (selectedColumns != null &&
-        schema.getCategory() == TypeDescription.Category.STRUCT) {
-      List<String> fieldNames = schema.getFieldNames();
-      List<TypeDescription> fields = schema.getChildren();
-      for (String column: selectedColumns.split((","))) {
-        TypeDescription col = findColumn(column, fieldNames, fields);
-        if (col != null) {
-          for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
-            results[i] = true;
-          }
-        }
-      }
-    }
-    return results;
-  }
-
-  private static TypeDescription findColumn(String columnName,
-                                            List<String> fieldNames,
-                                            List<TypeDescription> fields) {
-    int i = 0;
-    for(String fieldName: fieldNames) {
-      if (fieldName.equalsIgnoreCase(columnName)) {
-        return fields.get(i);
-      } else {
-        i += 1;
-      }
-    }
-    return null;
-  }
-
-  /**
-   * Convert a Hive type property string that contains separated type names into a list of
-   * TypeDescription objects.
-   * @return the list of TypeDescription objects.
-   */
-  public static ArrayList<TypeDescription> typeDescriptionsFromHiveTypeProperty(
-      String hiveTypeProperty) {
-
-    // CONSDIER: We need a type name parser for TypeDescription.
-
-    ArrayList<TypeInfo> typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(hiveTypeProperty);
-    ArrayList<TypeDescription> typeDescrList =new ArrayList<TypeDescription>(typeInfoList.size());
-    for (TypeInfo typeInfo : typeInfoList) {
-      typeDescrList.add(convertTypeInfo(typeInfo));
-    }
-    return typeDescrList;
-  }
-
-  public static TypeDescription convertTypeInfo(TypeInfo info) {
-    switch (info.getCategory()) {
-      case PRIMITIVE: {
-        PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info;
-        switch (pinfo.getPrimitiveCategory()) {
-          case BOOLEAN:
-            return TypeDescription.createBoolean();
-          case BYTE:
-            return TypeDescription.createByte();
-          case SHORT:
-            return TypeDescription.createShort();
-          case INT:
-            return TypeDescription.createInt();
-          case LONG:
-            return TypeDescription.createLong();
-          case FLOAT:
-            return TypeDescription.createFloat();
-          case DOUBLE:
-            return TypeDescription.createDouble();
-          case STRING:
-            return TypeDescription.createString();
-          case DATE:
-            return TypeDescription.createDate();
-          case TIMESTAMP:
-            return TypeDescription.createTimestamp();
-          case BINARY:
-            return TypeDescription.createBinary();
-          case DECIMAL: {
-            DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo;
-            return TypeDescription.createDecimal()
-                .withScale(dinfo.getScale())
-                .withPrecision(dinfo.getPrecision());
-          }
-          case VARCHAR: {
-            BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
-            return TypeDescription.createVarchar()
-                .withMaxLength(cinfo.getLength());
-          }
-          case CHAR: {
-            BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
-            return TypeDescription.createChar()
-                .withMaxLength(cinfo.getLength());
-          }
-          default:
-            throw new IllegalArgumentException("ORC doesn't handle primitive" +
-                " category " + pinfo.getPrimitiveCategory());
-        }
-      }
-      case LIST: {
-        ListTypeInfo linfo = (ListTypeInfo) info;
-        return TypeDescription.createList
-            (convertTypeInfo(linfo.getListElementTypeInfo()));
-      }
-      case MAP: {
-        MapTypeInfo minfo = (MapTypeInfo) info;
-        return TypeDescription.createMap
-            (convertTypeInfo(minfo.getMapKeyTypeInfo()),
-                convertTypeInfo(minfo.getMapValueTypeInfo()));
-      }
-      case UNION: {
-        UnionTypeInfo minfo = (UnionTypeInfo) info;
-        TypeDescription result = TypeDescription.createUnion();
-        for (TypeInfo child: minfo.getAllUnionObjectTypeInfos()) {
-          result.addUnionChild(convertTypeInfo(child));
-        }
-        return result;
-      }
-      case STRUCT: {
-        StructTypeInfo sinfo = (StructTypeInfo) info;
-        TypeDescription result = TypeDescription.createStruct();
-        for(String fieldName: sinfo.getAllStructFieldNames()) {
-          result.addField(fieldName,
-              convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName)));
-        }
-        return result;
-      }
-      default:
-        throw new IllegalArgumentException("ORC doesn't handle " +
-            info.getCategory());
-    }
-  }
-
-  public static List<OrcProto.Type> getOrcTypes(TypeDescription typeDescr) {
-    List<OrcProto.Type> result = Lists.newArrayList();
-    appendOrcTypes(result, typeDescr);
-    return result;
-  }
-
-  private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription typeDescr) {
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    List<TypeDescription> children = typeDescr.getChildren();
-    switch (typeDescr.getCategory()) {
-    case BOOLEAN:
-      type.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      type.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      type.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      type.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      type.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      type.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      type.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      type.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      type.setKind(OrcProto.Type.Kind.CHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case VARCHAR:
-      type.setKind(Type.Kind.VARCHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case BINARY:
-      type.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      type.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      type.setKind(OrcProto.Type.Kind.DECIMAL);
-      type.setPrecision(typeDescr.getPrecision());
-      type.setScale(typeDescr.getScale());
-      break;
-    case LIST:
-      type.setKind(OrcProto.Type.Kind.LIST);
-      type.addSubtypes(children.get(0).getId());
-      break;
-    case MAP:
-      type.setKind(OrcProto.Type.Kind.MAP);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      break;
-    case STRUCT:
-      type.setKind(OrcProto.Type.Kind.STRUCT);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      for(String field: typeDescr.getFieldNames()) {
-        type.addFieldNames(field);
-      }
-      break;
-    case UNION:
-      type.setKind(OrcProto.Type.Kind.UNION);
-      for(TypeDescription t: children) {
-        type.addSubtypes(t.getId());
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " +
-          typeDescr.getCategory());
-    }
-    result.add(type.build());
-    if (children != null) {
-      for(TypeDescription child: children) {
-        appendOrcTypes(result, child);
-      }
-    }
-  }
-
-  /**
-   * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype
-   * numbers based on the length of the result list being appended.
-   *
-   * @param result
-   * @param typeInfo
-   */
-  public static void appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
-      TypeDescription typeDescr) {
-
-    int subtype = result.size();
-    OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    boolean needsAdd = true;
-    List<TypeDescription> children = typeDescr.getChildren();
-    switch (typeDescr.getCategory()) {
-    case BOOLEAN:
-      type.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      type.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      type.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      type.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      type.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      type.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      type.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      type.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      type.setKind(OrcProto.Type.Kind.CHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case VARCHAR:
-      type.setKind(Type.Kind.VARCHAR);
-      type.setMaximumLength(typeDescr.getMaxLength());
-      break;
-    case BINARY:
-      type.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      type.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      type.setKind(OrcProto.Type.Kind.DECIMAL);
-      type.setPrecision(typeDescr.getPrecision());
-      type.setScale(typeDescr.getScale());
-      break;
-    case LIST:
-      type.setKind(OrcProto.Type.Kind.LIST);
-      type.addSubtypes(++subtype);
-      result.add(type.build());
-      needsAdd = false;
-      appendOrcTypesRebuildSubtypes(result, children.get(0));
-      break;
-    case MAP:
-      {
-        // Make room for MAP type.
-        result.add(null);
-  
-        // Add MAP type pair in order to determine their subtype values.
-        appendOrcTypesRebuildSubtypes(result, children.get(0));
-        int subtype2 = result.size();
-        appendOrcTypesRebuildSubtypes(result, children.get(1));
-        type.setKind(OrcProto.Type.Kind.MAP);
-        type.addSubtypes(subtype + 1);
-        type.addSubtypes(subtype2);
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    case STRUCT:
-      {
-        List<String> fieldNames = typeDescr.getFieldNames();
-
-        // Make room for STRUCT type.
-        result.add(null);
-
-        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
-        for(TypeDescription child: children) {
-          int fieldSubtype = result.size();
-          fieldSubtypes.add(fieldSubtype);
-          appendOrcTypesRebuildSubtypes(result, child);
-        }
-
-        type.setKind(OrcProto.Type.Kind.STRUCT);
-
-        for (int i = 0 ; i < fieldNames.size(); i++) {
-          type.addSubtypes(fieldSubtypes.get(i));
-          type.addFieldNames(fieldNames.get(i));
-        }
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    case UNION:
-      {
-        // Make room for UNION type.
-        result.add(null);
-
-        List<Integer> unionSubtypes = new ArrayList<Integer>(children.size());
-        for(TypeDescription child: children) {
-          int unionSubtype = result.size();
-          unionSubtypes.add(unionSubtype);
-          appendOrcTypesRebuildSubtypes(result, child);
-        }
-
-        type.setKind(OrcProto.Type.Kind.UNION);
-        for (int i = 0 ; i < children.size(); i++) {
-          type.addSubtypes(unionSubtypes.get(i));
-        }
-        result.set(subtype, type.build());
-        needsAdd = false;
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory());
-    }
-    if (needsAdd) {
-      result.add(type.build());
-    }
-  }
-
-  /**
-   * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype
-   * numbers based on the length of the result list being appended.
-   *
-   * @param result
-   * @param typeInfo
-   */
-  public static int appendOrcTypesRebuildSubtypes(List<OrcProto.Type> result,
-      List<OrcProto.Type> types, int columnId) {
-
-    OrcProto.Type oldType = types.get(columnId++);
-
-    int subtype = result.size();
-    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
-    boolean needsAdd = true;
-    switch (oldType.getKind()) {
-    case BOOLEAN:
-      builder.setKind(OrcProto.Type.Kind.BOOLEAN);
-      break;
-    case BYTE:
-      builder.setKind(OrcProto.Type.Kind.BYTE);
-      break;
-    case SHORT:
-      builder.setKind(OrcProto.Type.Kind.SHORT);
-      break;
-    case INT:
-      builder.setKind(OrcProto.Type.Kind.INT);
-      break;
-    case LONG:
-      builder.setKind(OrcProto.Type.Kind.LONG);
-      break;
-    case FLOAT:
-      builder.setKind(OrcProto.Type.Kind.FLOAT);
-      break;
-    case DOUBLE:
-      builder.setKind(OrcProto.Type.Kind.DOUBLE);
-      break;
-    case STRING:
-      builder.setKind(OrcProto.Type.Kind.STRING);
-      break;
-    case CHAR:
-      builder.setKind(OrcProto.Type.Kind.CHAR);
-      builder.setMaximumLength(oldType.getMaximumLength());
-      break;
-    case VARCHAR:
-      builder.setKind(Type.Kind.VARCHAR);
-      builder.setMaximumLength(oldType.getMaximumLength());
-      break;
-    case BINARY:
-      builder.setKind(OrcProto.Type.Kind.BINARY);
-      break;
-    case TIMESTAMP:
-      builder.setKind(OrcProto.Type.Kind.TIMESTAMP);
-      break;
-    case DATE:
-      builder.setKind(OrcProto.Type.Kind.DATE);
-      break;
-    case DECIMAL:
-      builder.setKind(OrcProto.Type.Kind.DECIMAL);
-      builder.setPrecision(oldType.getPrecision());
-      builder.setScale(oldType.getScale());
-      break;
-    case LIST:
-      builder.setKind(OrcProto.Type.Kind.LIST);
-      builder.addSubtypes(++subtype);
-      result.add(builder.build());
-      needsAdd = false;
-      columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-      break;
-    case MAP:
-      {
-        // Make room for MAP type.
-        result.add(null);
-  
-        // Add MAP type pair in order to determine their subtype values.
-        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        int subtype2 = result.size();
-        columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        builder.setKind(OrcProto.Type.Kind.MAP);
-        builder.addSubtypes(subtype + 1);
-        builder.addSubtypes(subtype2);
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    case STRUCT:
-      {
-        List<String> fieldNames = oldType.getFieldNamesList();
-
-        // Make room for STRUCT type.
-        result.add(null);
-
-        List<Integer> fieldSubtypes = new ArrayList<Integer>(fieldNames.size());
-        for(int i = 0 ; i < fieldNames.size(); i++) {
-          int fieldSubtype = result.size();
-          fieldSubtypes.add(fieldSubtype);
-          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        }
-
-        builder.setKind(OrcProto.Type.Kind.STRUCT);
-
-        for (int i = 0 ; i < fieldNames.size(); i++) {
-          builder.addSubtypes(fieldSubtypes.get(i));
-          builder.addFieldNames(fieldNames.get(i));
-        }
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    case UNION:
-      {
-        int subtypeCount = oldType.getSubtypesCount();
-
-        // Make room for UNION type.
-        result.add(null);
-
-        List<Integer> unionSubtypes = new ArrayList<Integer>(subtypeCount);
-        for(int i = 0 ; i < subtypeCount; i++) {
-          int unionSubtype = result.size();
-          unionSubtypes.add(unionSubtype);
-          columnId = appendOrcTypesRebuildSubtypes(result, types, columnId);
-        }
-
-        builder.setKind(OrcProto.Type.Kind.UNION);
-        for (int i = 0 ; i < subtypeCount; i++) {
-          builder.addSubtypes(unionSubtypes.get(i));
-        }
-        result.set(subtype, builder.build());
-        needsAdd = false;
-      }
-      break;
-    default:
-      throw new IllegalArgumentException("Unknown category: " + oldType.getKind());
-    }
-    if (needsAdd) {
-      result.add(builder.build());
-    }
-    return columnId;
-  }
-
-  public static TypeDescription getDesiredRowTypeDescr(Configuration conf) {
-
-    String columnNameProperty = null;
-    String columnTypeProperty = null;
-
-    ArrayList<String> schemaEvolutionColumnNames = null;
-    ArrayList<TypeDescription> schemaEvolutionTypeDescrs = null;
-
-    boolean haveSchemaEvolutionProperties = false;
-    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION)) {
-
-      columnNameProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS);
-      columnTypeProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES);
-
-      haveSchemaEvolutionProperties =
-          (columnNameProperty != null && columnTypeProperty != null);
-
-      if (haveSchemaEvolutionProperties) {
-        schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(","));
-        if (schemaEvolutionColumnNames.size() == 0) {
-          haveSchemaEvolutionProperties = false;
-        } else {
-          schemaEvolutionTypeDescrs =
-              OrcUtils.typeDescriptionsFromHiveTypeProperty(columnTypeProperty);
-          if (schemaEvolutionTypeDescrs.size() != schemaEvolutionColumnNames.size()) {
-            haveSchemaEvolutionProperties = false;
-          }
-        }
-      }
-    }
-
-    if (!haveSchemaEvolutionProperties) {
-
-      // Try regular properties;
-      columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
-      columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
-      if (columnTypeProperty == null || columnNameProperty == null) {
-        return null;
-      }
-
-      schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(","));
-      if (schemaEvolutionColumnNames.size() == 0) {
-        return null;
-      }
-      schemaEvolutionTypeDescrs =
-          OrcUtils.typeDescriptionsFromHiveTypeProperty(columnTypeProperty);
-      if (schemaEvolutionTypeDescrs.size() != schemaEvolutionColumnNames.size()) {
-        return null;
-      }
-    }
-
-    // Desired schema does not include virtual columns or partition columns.
-    TypeDescription result = TypeDescription.createStruct();
-    for (int i = 0; i < schemaEvolutionColumnNames.size(); i++) {
-      result.addField(schemaEvolutionColumnNames.get(i), schemaEvolutionTypeDescrs.get(i));
-    }
-
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
deleted file mode 100644
index b228c89..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-public class OutStream extends PositionedOutputStream {
-
-  interface OutputReceiver {
-    /**
-     * Output the given buffer to the final destination
-     * @param buffer the buffer to output
-     * @throws IOException
-     */
-    void output(ByteBuffer buffer) throws IOException;
-  }
-
-  public static final int HEADER_SIZE = 3;
-  private final String name;
-  private final OutputReceiver receiver;
-  // if enabled the stream will be suppressed when writing stripe
-  private boolean suppress;
-
-  /**
-   * Stores the uncompressed bytes that have been serialized, but not
-   * compressed yet. When this fills, we compress the entire buffer.
-   */
-  private ByteBuffer current = null;
-
-  /**
-   * Stores the compressed bytes until we have a full buffer and then outputs
-   * them to the receiver. If no compression is being done, this (and overflow)
-   * will always be null and the current buffer will be sent directly to the
-   * receiver.
-   */
-  private ByteBuffer compressed = null;
-
-  /**
-   * Since the compressed buffer may start with contents from previous
-   * compression blocks, we allocate an overflow buffer so that the
-   * output of the codec can be split between the two buffers. After the
-   * compressed buffer is sent to the receiver, the overflow buffer becomes
-   * the new compressed buffer.
-   */
-  private ByteBuffer overflow = null;
-  private final int bufferSize;
-  private final CompressionCodec codec;
-  private long compressedBytes = 0;
-  private long uncompressedBytes = 0;
-
-  OutStream(String name,
-            int bufferSize,
-            CompressionCodec codec,
-            OutputReceiver receiver) throws IOException {
-    this.name = name;
-    this.bufferSize = bufferSize;
-    this.codec = codec;
-    this.receiver = receiver;
-    this.suppress = false;
-  }
-
-  public void clear() throws IOException {
-    flush();
-    suppress = false;
-  }
-
-  /**
-   * Write the length of the compressed bytes. Life is much easier if the
-   * header is constant length, so just use 3 bytes. Considering most of the
-   * codecs want between 32k (snappy) and 256k (lzo, zlib), 3 bytes should
-   * be plenty. We also use the low bit for whether it is the original or
-   * compressed bytes.
-   * @param buffer the buffer to write the header to
-   * @param position the position in the buffer to write at
-   * @param val the size in the file
-   * @param original is it uncompressed
-   */
-  private static void writeHeader(ByteBuffer buffer,
-                                  int position,
-                                  int val,
-                                  boolean original) {
-    buffer.put(position, (byte) ((val << 1) + (original ? 1 : 0)));
-    buffer.put(position + 1, (byte) (val >> 7));
-    buffer.put(position + 2, (byte) (val >> 15));
-  }
-
-  private void getNewInputBuffer() throws IOException {
-    if (codec == null) {
-      current = ByteBuffer.allocate(bufferSize);
-    } else {
-      current = ByteBuffer.allocate(bufferSize + HEADER_SIZE);
-      writeHeader(current, 0, bufferSize, true);
-      current.position(HEADER_SIZE);
-    }
-  }
-
-  /**
-   * Allocate a new output buffer if we are compressing.
-   */
-  private ByteBuffer getNewOutputBuffer() throws IOException {
-    return ByteBuffer.allocate(bufferSize + HEADER_SIZE);
-  }
-
-  private void flip() throws IOException {
-    current.limit(current.position());
-    current.position(codec == null ? 0 : HEADER_SIZE);
-  }
-
-  @Override
-  public void write(int i) throws IOException {
-    if (current == null) {
-      getNewInputBuffer();
-    }
-    if (current.remaining() < 1) {
-      spill();
-    }
-    uncompressedBytes += 1;
-    current.put((byte) i);
-  }
-
-  @Override
-  public void write(byte[] bytes, int offset, int length) throws IOException {
-    if (current == null) {
-      getNewInputBuffer();
-    }
-    int remaining = Math.min(current.remaining(), length);
-    current.put(bytes, offset, remaining);
-    uncompressedBytes += remaining;
-    length -= remaining;
-    while (length != 0) {
-      spill();
-      offset += remaining;
-      remaining = Math.min(current.remaining(), length);
-      current.put(bytes, offset, remaining);
-      uncompressedBytes += remaining;
-      length -= remaining;
-    }
-  }
-
-  private void spill() throws java.io.IOException {
-    // if there isn't anything in the current buffer, don't spill
-    if (current == null ||
-        current.position() == (codec == null ? 0 : HEADER_SIZE)) {
-      return;
-    }
-    flip();
-    if (codec == null) {
-      receiver.output(current);
-      getNewInputBuffer();
-    } else {
-      if (compressed == null) {
-        compressed = getNewOutputBuffer();
-      } else if (overflow == null) {
-        overflow = getNewOutputBuffer();
-      }
-      int sizePosn = compressed.position();
-      compressed.position(compressed.position() + HEADER_SIZE);
-      if (codec.compress(current, compressed, overflow)) {
-        uncompressedBytes = 0;
-        // move position back to after the header
-        current.position(HEADER_SIZE);
-        current.limit(current.capacity());
-        // find the total bytes in the chunk
-        int totalBytes = compressed.position() - sizePosn - HEADER_SIZE;
-        if (overflow != null) {
-          totalBytes += overflow.position();
-        }
-        compressedBytes += totalBytes + HEADER_SIZE;
-        writeHeader(compressed, sizePosn, totalBytes, false);
-        // if we have less than the next header left, spill it.
-        if (compressed.remaining() < HEADER_SIZE) {
-          compressed.flip();
-          receiver.output(compressed);
-          compressed = overflow;
-          overflow = null;
-        }
-      } else {
-        compressedBytes += uncompressedBytes + HEADER_SIZE;
-        uncompressedBytes = 0;
-        // we are using the original, but need to spill the current
-        // compressed buffer first. So back up to where we started,
-        // flip it and add it to done.
-        if (sizePosn != 0) {
-          compressed.position(sizePosn);
-          compressed.flip();
-          receiver.output(compressed);
-          compressed = null;
-          // if we have an overflow, clear it and make it the new compress
-          // buffer
-          if (overflow != null) {
-            overflow.clear();
-            compressed = overflow;
-            overflow = null;
-          }
-        } else {
-          compressed.clear();
-          if (overflow != null) {
-            overflow.clear();
-          }
-        }
-
-        // now add the current buffer into the done list and get a new one.
-        current.position(0);
-        // update the header with the current length
-        writeHeader(current, 0, current.limit() - HEADER_SIZE, true);
-        receiver.output(current);
-        getNewInputBuffer();
-      }
-    }
-  }
-
-  void getPosition(PositionRecorder recorder) throws IOException {
-    if (codec == null) {
-      recorder.addPosition(uncompressedBytes);
-    } else {
-      recorder.addPosition(compressedBytes);
-      recorder.addPosition(uncompressedBytes);
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    spill();
-    if (compressed != null && compressed.position() != 0) {
-      compressed.flip();
-      receiver.output(compressed);
-      compressed = null;
-    }
-    uncompressedBytes = 0;
-    compressedBytes = 0;
-    overflow = null;
-    current = null;
-  }
-
-  @Override
-  public String toString() {
-    return name;
-  }
-
-  @Override
-  public long getBufferSize() {
-    long result = 0;
-    if (current != null) {
-      result += current.capacity();
-    }
-    if (compressed != null) {
-      result += compressed.capacity();
-    }
-    if (overflow != null) {
-      result += overflow.capacity();
-    }
-    return result;
-  }
-
-  /**
-   * Set suppress flag
-   */
-  public void suppress() {
-    suppress = true;
-  }
-
-  /**
-   * Returns the state of suppress flag
-   * @return value of suppress flag
-   */
-  public boolean isSuppressed() {
-    return suppress;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java
deleted file mode 100644
index 04d81cc..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * An interface used for seeking to a row index.
- */
-public interface PositionProvider {
-  long getNext();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java
deleted file mode 100644
index 9dc6011..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * An interface for recording positions in a stream.
- */
-interface PositionRecorder {
-  void addPosition(long offset);
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
deleted file mode 100644
index 3bee34d..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-abstract class PositionedOutputStream extends OutputStream {
-
-  /**
-   * Record the current position to the recorder.
-   * @param recorder the object that receives the position
-   * @throws IOException
-   */
-  abstract void getPosition(PositionRecorder recorder) throws IOException;
-
-  /**
-   * Get the memory size currently allocated as buffer associated with this
-   * stream.
-   * @return the number of bytes used by buffers.
-   */
-  abstract long getBufferSize();
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
index 750cf8d..8823e21 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
@@ -19,10 +19,7 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
 
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
@@ -31,78 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  *
  * One Reader can support multiple concurrent RecordReader.
  */
-public interface Reader {
-
-  /**
-   * Get the number of rows in the file.
-   * @return the number of rows
-   */
-  long getNumberOfRows();
-
-  /**
-   * Get the deserialized data size of the file
-   * @return raw data size
-   */
-  long getRawDataSize();
-
-  /**
-   * Get the deserialized data size of the specified columns
-   * @param colNames
-   * @return raw data size of columns
-   */
-  long getRawDataSizeOfColumns(List<String> colNames);
-
-  /**
-   * Get the deserialized data size of the specified columns ids
-   * @param colIds - internal column id (check orcfiledump for column ids)
-   * @return raw data size of columns
-   */
-  long getRawDataSizeFromColIndices(List<Integer> colIds);
-
-  /**
-   * Get the user metadata keys.
-   * @return the set of metadata keys
-   */
-  List<String> getMetadataKeys();
-
-  /**
-   * Get a user metadata value.
-   * @param key a key given by the user
-   * @return the bytes associated with the given key
-   */
-  ByteBuffer getMetadataValue(String key);
-
-  /**
-   * Did the user set the given metadata value.
-   * @param key the key to check
-   * @return true if the metadata value was set
-   */
-  boolean hasMetadataValue(String key);
-
-  /**
-   * Get the compression kind.
-   * @return the kind of compression in the file
-   */
-  CompressionKind getCompression();
-
-  /**
-   * Get the buffer size for the compression.
-   * @return number of bytes to buffer for the compression codec.
-   */
-  int getCompressionSize();
-
-  /**
-   * Get the number of rows per a entry in the row index.
-   * @return the number of rows per an entry in the row index or 0 if there
-   * is no row index.
-   */
-  int getRowIndexStride();
-
-  /**
-   * Get the list of stripes.
-   * @return the information about the stripes in order
-   */
-  List<StripeInformation> getStripes();
+public interface Reader extends org.apache.orc.Reader {
 
   /**
    * Get the object inspector for looking at the objects.
@@ -111,200 +37,9 @@ public interface Reader {
   ObjectInspector getObjectInspector();
 
   /**
-   * Get the length of the file.
-   * @return the number of bytes in the file
-   */
-  long getContentLength();
-
-  /**
-   * Get the statistics about the columns in the file.
-   * @return the information about the column
-   */
-  ColumnStatistics[] getStatistics();
-
-  /**
-   * Get the list of types contained in the file. The root type is the first
-   * type in the list.
-   * @return the list of flattened types
-   */
-  List<OrcProto.Type> getTypes();
-
-  /**
-   * Get the file format version.
-   */
-  OrcFile.Version getFileVersion();
-
-  /**
-   * Get the version of the writer of this file.
+   * Get the Compression kind in the compatibility mode.
    */
-  OrcFile.WriterVersion getWriterVersion();
-
-  /**
-   * Options for creating a RecordReader.
-   */
-  public static class Options {
-    private boolean[] include;
-    private long offset = 0;
-    private long length = Long.MAX_VALUE;
-    private TypeDescription schema;
-    private SearchArgument sarg = null;
-    private String[] columnNames = null;
-    private Boolean useZeroCopy = null;
-    private Boolean skipCorruptRecords = null;
-
-    /**
-     * Set the list of columns to read.
-     * @param include a list of columns to read
-     * @return this
-     */
-    public Options include(boolean[] include) {
-      this.include = include;
-      return this;
-    }
-
-    /**
-     * Set the range of bytes to read
-     * @param offset the starting byte offset
-     * @param length the number of bytes to read
-     * @return this
-     */
-    public Options range(long offset, long length) {
-      this.offset = offset;
-      this.length = length;
-      return this;
-    }
-
-    /**
-     * Set the schema on read type description.
-     */
-    public Options schema(TypeDescription schema) {
-      this.schema = schema;
-      return this;
-    }
-
-    /**
-     * Set search argument for predicate push down.
-     * @param sarg the search argument
-     * @param columnNames the column names for
-     * @return this
-     */
-    public Options searchArgument(SearchArgument sarg, String[] columnNames) {
-      this.sarg = sarg;
-      this.columnNames = columnNames;
-      return this;
-    }
-
-    /**
-     * Set whether to use zero copy from HDFS.
-     * @param value the new zero copy flag
-     * @return this
-     */
-    public Options useZeroCopy(boolean value) {
-      this.useZeroCopy = value;
-      return this;
-    }
-
-    /**
-     * Set whether to skip corrupt records.
-     * @param value the new skip corrupt records flag
-     * @return this
-     */
-    public Options skipCorruptRecords(boolean value) {
-      this.skipCorruptRecords = value;
-      return this;
-    }
-
-    public boolean[] getInclude() {
-      return include;
-    }
-
-    public long getOffset() {
-      return offset;
-    }
-
-    public long getLength() {
-      return length;
-    }
-
-    public TypeDescription getSchema() {
-      return schema;
-    }
-
-    public SearchArgument getSearchArgument() {
-      return sarg;
-    }
-
-    public String[] getColumnNames() {
-      return columnNames;
-    }
-
-    public long getMaxOffset() {
-      long result = offset + length;
-      if (result < 0) {
-        result = Long.MAX_VALUE;
-      }
-      return result;
-    }
-
-    public Boolean getUseZeroCopy() {
-      return useZeroCopy;
-    }
-
-    public Boolean getSkipCorruptRecords() {
-      return skipCorruptRecords;
-    }
-
-    public Options clone() {
-      Options result = new Options();
-      result.include = include;
-      result.offset = offset;
-      result.length = length;
-      result.schema = schema;
-      result.sarg = sarg;
-      result.columnNames = columnNames;
-      result.useZeroCopy = useZeroCopy;
-      result.skipCorruptRecords = skipCorruptRecords;
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buffer = new StringBuilder();
-      buffer.append("{include: ");
-      if (include == null) {
-        buffer.append("null");
-      } else {
-        buffer.append("[");
-        for(int i=0; i < include.length; ++i) {
-          if (i != 0) {
-            buffer.append(", ");
-          }
-          buffer.append(include[i]);
-        }
-        buffer.append("]");
-      }
-      buffer.append(", offset: ");
-      buffer.append(offset);
-      buffer.append(", length: ");
-      buffer.append(length);
-      if (sarg != null) {
-        buffer.append(", sarg: ");
-        buffer.append(sarg.toString());
-        buffer.append(", columns: [");
-        for(int i=0; i < columnNames.length; ++i) {
-          if (i != 0) {
-            buffer.append(", ");
-          }
-          buffer.append("'");
-          buffer.append(columnNames[i]);
-          buffer.append("'");
-        }
-        buffer.append("]");
-      }
-      buffer.append("}");
-      return buffer.toString();
-    }
-  }
+  CompressionKind getCompression();
 
   /**
    * Create a RecordReader that reads everything with the default options.
@@ -314,15 +49,13 @@ public interface Reader {
   RecordReader rows() throws IOException;
 
   /**
-   * Create a RecordReader that uses the options given.
-   * This method can't be named rows, because many callers used rows(null)
-   * before the rows() method was introduced.
-   * @param options the options to read with
+   * Create a RecordReader that reads everything with the given options.
+   * @param options the options to use
    * @return a new RecordReader
    * @throws IOException
    */
   RecordReader rowsOptions(Options options) throws IOException;
-
+  
   /**
    * Create a RecordReader that will scan the entire file.
    * This is a legacy method and rowsOptions is preferred.
@@ -365,44 +98,4 @@ public interface Reader {
                     boolean[] include, SearchArgument sarg,
                     String[] neededColumns) throws IOException;
 
-  /**
-   * @return Metadata reader used to read file metadata.
-   */
-  MetadataReader metadata() throws IOException;
-
-  /**
-   * @return List of integers representing version of the file, in order from major to minor.
-   */
-  List<Integer> getVersionList();
-
-  /**
-   * @return Gets the size of metadata, in bytes.
-   */
-  int getMetadataSize();
-
-  /**
-   * @return Stripe statistics, in original protobuf form.
-   */
-  List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics();
-
-  /**
-   * @return Stripe statistics.
-   */
-  List<StripeStatistics> getStripeStatistics();
-
-  /**
-   * @return File statistics, in original protobuf form.
-   */
-  List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics();
-
-  /**
-   * @param useZeroCopy Whether zero-copy read should be used.
-   * @return The default data reader that ORC is using to read bytes from disk.
-   */
-  DataReader createDefaultDataReader(boolean useZeroCopy);
-
-  /**
-   * @return Serialized file metadata read from disk for the purposes of caching, etc.
-   */
-  ByteBuffer getSerializedFileFooter();
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index f2f5f49..e31fd0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -26,6 +26,18 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.orc.impl.BufferChunk;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.DataReader;
+import org.apache.orc.FileMetaInfo;
+import org.apache.orc.FileMetadata;
+import org.apache.orc.impl.InStream;
+import org.apache.orc.impl.MetadataReader;
+import org.apache.orc.impl.MetadataReaderImpl;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.StripeStatistics;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -33,15 +45,12 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
 import org.apache.hadoop.hive.ql.io.FileFormatException;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.io.Text;
+import org.apache.orc.OrcProto;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -55,7 +64,7 @@ public class ReaderImpl implements Reader {
 
   protected final FileSystem fileSystem;
   protected final Path path;
-  protected final CompressionKind compressionKind;
+  protected final org.apache.orc.CompressionKind compressionKind;
   protected final CompressionCodec codec;
   protected final int bufferSize;
   private final List<OrcProto.StripeStatistics> stripeStats;
@@ -162,7 +171,18 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
-  public CompressionKind getCompression() {
+  public org.apache.hadoop.hive.ql.io.orc.CompressionKind getCompression() {
+    for (CompressionKind value: org.apache.hadoop.hive.ql.io.orc.CompressionKind.values()) {
+      if (value.getUnderlying() == compressionKind) {
+        return value;
+      }
+    }
+    throw new IllegalArgumentException("Unknown compression kind " +
+        compressionKind);
+  }
+
+  @Override
+  public org.apache.orc.CompressionKind getCompressionKind() {
     return compressionKind;
   }
 
@@ -318,7 +338,7 @@ public class ReaderImpl implements Reader {
       this.metadataSize = fileMetadata.getMetadataSize();
       this.stripeStats = fileMetadata.getStripeStats();
       this.versionList = fileMetadata.getVersionList();
-      this.writerVersion = WriterVersion.from(fileMetadata.getWriterVersionNum());
+      this.writerVersion = OrcFile.WriterVersion.from(fileMetadata.getWriterVersionNum());
       this.types = fileMetadata.getTypes();
       this.rowIndexStride = fileMetadata.getRowIndexStride();
       this.contentLength = fileMetadata.getContentLength();
@@ -393,7 +413,9 @@ public class ReaderImpl implements Reader {
     int footerSize = (int)ps.getFooterLength(), metadataSize = (int)ps.getMetadataLength(),
         footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
     String compressionType = ps.getCompression().toString();
-    CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(compressionType));
+    CompressionCodec codec =
+        WriterImpl.createCodec(org.apache.orc.CompressionKind.valueOf
+            (compressionType));
     int bufferSize = (int)ps.getCompressionBlockSize();
     bb.position(metadataAbsPos);
     bb.mark();
@@ -549,7 +571,7 @@ public class ReaderImpl implements Reader {
    *
    */
   private static class MetaInfoObjExtractor{
-    final CompressionKind compressionKind;
+    final org.apache.orc.CompressionKind compressionKind;
     final CompressionCodec codec;
     final int bufferSize;
     final int metadataSize;
@@ -560,7 +582,7 @@ public class ReaderImpl implements Reader {
     MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, 
         ByteBuffer footerBuffer) throws IOException {
 
-      this.compressionKind = CompressionKind.valueOf(codecStr);
+      this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr);
       this.bufferSize = bufferSize;
       this.codec = WriterImpl.createCodec(compressionKind);
       this.metadataSize = metadataSize;
@@ -693,7 +715,7 @@ public class ReaderImpl implements Reader {
       List<OrcProto.ColumnStatistics> stats) {
     OrcProto.ColumnStatistics colStat = stats.get(colIdx);
     long numVals = colStat.getNumberOfValues();
-    Type type = types.get(colIdx);
+    OrcProto.Type type = types.get(colIdx);
 
     switch (type.getKind()) {
     case BINARY:
@@ -742,7 +764,7 @@ public class ReaderImpl implements Reader {
 
   private List<Integer> getColumnIndicesFromNames(List<String> colNames) {
     // top level struct
-    Type type = types.get(0);
+    OrcProto.Type type = types.get(0);
     List<Integer> colIndices = Lists.newArrayList();
     List<String> fieldNames = type.getFieldNamesList();
     int fieldIdx = 0;
@@ -789,7 +811,7 @@ public class ReaderImpl implements Reader {
 
   private int getLastIdx() {
     Set<Integer> indices = Sets.newHashSet();
-    for (Type type : types) {
+    for (OrcProto.Type type : types) {
       indices.addAll(type.getSubtypesList());
     }
     return Collections.max(indices);
@@ -814,7 +836,7 @@ public class ReaderImpl implements Reader {
     return result;
   }
 
-  public List<UserMetadataItem> getOrcProtoUserMetadata() {
+  public List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata() {
     return userMetadata;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
index dba9071..ff5612d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
@@ -24,7 +24,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 /**
  * A row-by-row iterator for ORC files.
  */
-public interface RecordReader {
+public interface RecordReader extends org.apache.orc.RecordReader {
   /**
    * Does the reader have more rows available.
    * @return true if there are more rows
@@ -39,40 +39,4 @@ public interface RecordReader {
    * @throws java.io.IOException
    */
   Object next(Object previous) throws IOException;
-
-  /**
-   * Read the next row batch. The size of the batch to read cannot be controlled
-   * by the callers. Caller need to look at VectorizedRowBatch.size of the retunred
-   * object to know the batch size read.
-   * @param previousBatch a row batch object that can be reused by the reader
-   * @return the row batch that was read
-   * @throws java.io.IOException
-   */
-  VectorizedRowBatch nextBatch(VectorizedRowBatch previousBatch) throws IOException;
-
-  /**
-   * Get the row number of the row that will be returned by the following
-   * call to next().
-   * @return the row number from 0 to the number of rows in the file
-   * @throws java.io.IOException
-   */
-  long getRowNumber() throws IOException;
-
-  /**
-   * Get the progress of the reader through the rows.
-   * @return a fraction between 0.0 and 1.0 of rows read
-   * @throws java.io.IOException
-   */
-  float getProgress() throws IOException;
-
-  /**
-   * Release the resources associated with the given reader.
-   * @throws java.io.IOException
-   */
-  void close() throws IOException;
-
-  /**
-   * Seek to a particular row number.
-   */
-  void seekToRow(long rowCount) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index f36bceb..607003f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
 import java.math.BigDecimal;
-import java.nio.ByteBuffer;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.ArrayList;
@@ -29,6 +28,27 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.orc.BooleanColumnStatistics;
+import org.apache.orc.OrcUtils;
+import org.apache.orc.impl.BufferChunk;
+import org.apache.orc.ColumnStatistics;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.DataReader;
+import org.apache.orc.DateColumnStatistics;
+import org.apache.orc.DecimalColumnStatistics;
+import org.apache.orc.DoubleColumnStatistics;
+import org.apache.orc.impl.InStream;
+import org.apache.orc.IntegerColumnStatistics;
+import org.apache.orc.impl.MetadataReader;
+import org.apache.orc.impl.MetadataReaderImpl;
+import org.apache.orc.OrcConf;
+import org.apache.orc.impl.OrcIndex;
+import org.apache.orc.impl.PositionProvider;
+import org.apache.orc.impl.StreamName;
+import org.apache.orc.StringColumnStatistics;
+import org.apache.orc.StripeInformation;
+import org.apache.orc.TimestampColumnStatistics;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -41,9 +61,6 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
-import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReaderSchema;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -51,6 +68,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.orc.OrcProto;
 
 public class RecordReaderImpl implements RecordReader {
   static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class);
@@ -84,28 +102,6 @@ public class RecordReaderImpl implements RecordReader {
   private final MetadataReader metadata;
   private final DataReader dataReader;
 
-  public final static class Index {
-    OrcProto.RowIndex[] rowGroupIndex;
-    OrcProto.BloomFilterIndex[] bloomFilterIndex;
-
-    public Index(OrcProto.RowIndex[] rgIndex, OrcProto.BloomFilterIndex[] bfIndex) {
-      this.rowGroupIndex = rgIndex;
-      this.bloomFilterIndex = bfIndex;
-    }
-
-    public OrcProto.RowIndex[] getRowGroupIndex() {
-      return rowGroupIndex;
-    }
-
-    public OrcProto.BloomFilterIndex[] getBloomFilterIndex() {
-      return bloomFilterIndex;
-    }
-
-    public void setRowGroupIndex(OrcProto.RowIndex[] rowGroupIndex) {
-      this.rowGroupIndex = rowGroupIndex;
-    }
-  }
-
   /**
    * Given a list of column names, find the given column and return the index.
    *
@@ -156,15 +152,15 @@ public class RecordReaderImpl implements RecordReader {
                              Configuration conf
                              ) throws IOException {
 
-    TreeReaderSchema treeReaderSchema;
+    TreeReaderFactory.TreeReaderSchema treeReaderSchema;
     if (options.getSchema() == null) {
-      treeReaderSchema = new TreeReaderSchema().fileTypes(types).schemaTypes(types);
+      treeReaderSchema = new TreeReaderFactory.TreeReaderSchema().fileTypes(types).schemaTypes(types);
     } else {
 
       // Now that we are creating a record reader for a file, validate that the schema to read
       // is compatible with the file schema.
       //
-      List<Type> schemaTypes = OrcUtils.getOrcTypes(options.getSchema());
+      List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(options.getSchema());
       treeReaderSchema = SchemaEvolution.validateAndCreate(types, schemaTypes);
     }
     this.path = path;
@@ -741,7 +737,7 @@ public class RecordReaderImpl implements RecordReader {
             if (indexes[columnIx] == null) {
               throw new AssertionError("Index is not populated for " + columnIx);
             }
-            RowIndexEntry entry = indexes[columnIx].getEntry(rowGroup);
+            OrcProto.RowIndexEntry entry = indexes[columnIx].getEntry(rowGroup);
             if (entry == null) {
               throw new AssertionError("RG is not populated for " + columnIx + " rg " + rowGroup);
             }
@@ -801,7 +797,7 @@ public class RecordReaderImpl implements RecordReader {
           if (!(range instanceof BufferChunk)) {
             continue;
           }
-          dataReader.releaseBuffer(((BufferChunk) range).chunk);
+          dataReader.releaseBuffer(((BufferChunk) range).getChunk());
         }
       }
     }
@@ -871,60 +867,6 @@ public class RecordReaderImpl implements RecordReader {
   }
 
   /**
-   * The sections of stripe that we have read.
-   * This might not match diskRange - 1 disk range can be multiple buffer chunks, depending on DFS block boundaries.
-   */
-  public static class BufferChunk extends DiskRangeList {
-    final ByteBuffer chunk;
-
-    public BufferChunk(ByteBuffer chunk, long offset) {
-      super(offset, offset + chunk.remaining());
-      this.chunk = chunk;
-    }
-
-    public ByteBuffer getChunk() {
-      return chunk;
-    }
-
-    @Override
-    public boolean hasData() {
-      return chunk != null;
-    }
-
-    @Override
-    public final String toString() {
-      boolean makesSense = chunk.remaining() == (end - offset);
-      return "data range [" + offset + ", " + end + "), size: " + chunk.remaining()
-          + (makesSense ? "" : "(!)") + " type: " + (chunk.isDirect() ? "direct" : "array-backed");
-    }
-
-    @Override
-    public DiskRange sliceAndShift(long offset, long end, long shiftBy) {
-      assert offset <= end && offset >= this.offset && end <= this.end;
-      assert offset + shiftBy >= 0;
-      ByteBuffer sliceBuf = chunk.slice();
-      int newPos = (int) (offset - this.offset);
-      int newLimit = newPos + (int) (end - offset);
-      try {
-        sliceBuf.position(newPos);
-        sliceBuf.limit(newLimit);
-      } catch (Throwable t) {
-        LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end
-            + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", "
-            + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") "
-            + t.getClass());
-        throw new RuntimeException(t);
-      }
-      return new BufferChunk(sliceBuf, offset + shiftBy);
-    }
-
-    @Override
-    public ByteBuffer getData() {
-      return chunk;
-    }
-  }
-
-  /**
    * Plan the ranges of the file that we need to read given the list of
    * columns and row groups.
    *
@@ -1199,12 +1141,12 @@ public class RecordReaderImpl implements RecordReader {
     throw new IllegalArgumentException("Seek after the end of reader range");
   }
 
-  Index readRowIndex(
+  OrcIndex readRowIndex(
       int stripeIndex, boolean[] included, boolean[] sargColumns) throws IOException {
     return readRowIndex(stripeIndex, included, null, null, sargColumns);
   }
 
-  Index readRowIndex(int stripeIndex, boolean[] included, OrcProto.RowIndex[] indexes,
+  OrcIndex readRowIndex(int stripeIndex, boolean[] included, OrcProto.RowIndex[] indexes,
       OrcProto.BloomFilterIndex[] bloomFilterIndex, boolean[] sargColumns) throws IOException {
     StripeInformation stripe = stripes.get(stripeIndex);
     OrcProto.StripeFooter stripeFooter = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
index 0caeb1b..8a73948 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import java.io.EOFException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
@@ -33,13 +32,18 @@ import org.apache.hadoop.hive.common.io.DiskRange;
 import org.apache.hadoop.hive.common.io.DiskRangeList;
 import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
 import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper;
-import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.shims.HadoopShims.ByteBufferPoolShim;
 import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim;
+import org.apache.orc.impl.BufferChunk;
+import org.apache.orc.CompressionCodec;
+import org.apache.orc.DataReader;
+import org.apache.orc.impl.DirectDecompressionCodec;
+import org.apache.orc.OrcProto;
 
 import com.google.common.collect.ComparisonChain;
+import org.apache.orc.impl.OutStream;
 
 /**
  * Stateless methods shared between RecordReaderImpl and EncodedReaderImpl.
@@ -262,7 +266,7 @@ public class RecordReaderUtils {
 
   /**
    * Build a string representation of a list of disk ranges.
-   * @param ranges ranges to stringify
+   * @param range ranges to stringify
    * @return the resulting string
    */
   public static String stringifyDiskRanges(DiskRangeList range) {
@@ -288,7 +292,7 @@ public class RecordReaderUtils {
    * Read the list of ranges from the file.
    * @param file the file to read
    * @param base the base of the stripe
-   * @param ranges the disk ranges within the stripe to read
+   * @param range the disk ranges within the stripe to read
    * @return the bytes read for each disk range, which is the same length as
    *    ranges
    * @throws IOException

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
deleted file mode 100644
index d98713c..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
+++ /dev/null
@@ -1,309 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io.orc;
-
-/**
- * A memory efficient red-black tree that does not allocate any objects per
- * an element. This class is abstract and assumes that the child class
- * handles the key and comparisons with the key.
- */
-abstract class RedBlackTree {
-  public static final int NULL = -1;
-
-  // Various values controlling the offset of the data within the array.
-  private static final int LEFT_OFFSET = 0;
-  private static final int RIGHT_OFFSET = 1;
-  private static final int ELEMENT_SIZE = 2;
-
-  protected int size = 0;
-  private final DynamicIntArray data;
-  protected int root = NULL;
-  protected int lastAdd = 0;
-  private boolean wasAdd = false;
-
-  /**
-   * Create a set with the given initial capacity.
-   */
-  public RedBlackTree(int initialCapacity) {
-    data = new DynamicIntArray(initialCapacity * ELEMENT_SIZE);
-  }
-
-  /**
-   * Insert a new node into the data array, growing the array as necessary.
-   *
-   * @return Returns the position of the new node.
-   */
-  private int insert(int left, int right, boolean isRed) {
-    int position = size;
-    size += 1;
-    setLeft(position, left, isRed);
-    setRight(position, right);
-    return position;
-  }
-
-  /**
-   * Compare the value at the given position to the new value.
-   * @return 0 if the values are the same, -1 if the new value is smaller and
-   *         1 if the new value is larger.
-   */
-  protected abstract int compareValue(int position);
-
-  /**
-   * Is the given node red as opposed to black? To prevent having an extra word
-   * in the data array, we just the low bit on the left child index.
-   */
-  protected boolean isRed(int position) {
-    return position != NULL &&
-        (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
-  }
-
-  /**
-   * Set the red bit true or false.
-   */
-  private void setRed(int position, boolean isRed) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    if (isRed) {
-      data.set(offset, data.get(offset) | 1);
-    } else {
-      data.set(offset, data.get(offset) & ~1);
-    }
-  }
-
-  /**
-   * Get the left field of the given position.
-   */
-  protected int getLeft(int position) {
-    return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
-  }
-
-  /**
-   * Get the right field of the given position.
-   */
-  protected int getRight(int position) {
-    return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
-  }
-
-  /**
-   * Set the left field of the given position.
-   * Note that we are storing the node color in the low bit of the left pointer.
-   */
-  private void setLeft(int position, int left) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    data.set(offset, (left << 1) | (data.get(offset) & 1));
-  }
-
-  /**
-   * Set the left field of the given position.
-   * Note that we are storing the node color in the low bit of the left pointer.
-   */
-  private void setLeft(int position, int left, boolean isRed) {
-    int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
-    data.set(offset, (left << 1) | (isRed ? 1 : 0));
-  }
-
-  /**
-   * Set the right field of the given position.
-   */
-  private void setRight(int position, int right) {
-    data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
-  }
-
-  /**
-   * Insert or find a given key in the tree and rebalance the tree correctly.
-   * Rebalancing restores the red-black aspect of the tree to maintain the
-   * invariants:
-   *   1. If a node is red, both of its children are black.
-   *   2. Each child of a node has the same black height (the number of black
-   *      nodes between it and the leaves of the tree).
-   *
-   * Inserted nodes are at the leaves and are red, therefore there is at most a
-   * violation of rule 1 at the node we just put in. Instead of always keeping
-   * the parents, this routine passing down the context.
-   *
-   * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
-   * left-right mirror images of each other). See Algorighms by Cormen,
-   * Leiserson, and Rivest for the explaination of the subcases.
-   *
-   * @param node The node that we are fixing right now.
-   * @param fromLeft Did we come down from the left?
-   * @param parent Nodes' parent
-   * @param grandparent Parent's parent
-   * @param greatGrandparent Grandparent's parent
-   * @return Does parent also need to be checked and/or fixed?
-   */
-  private boolean add(int node, boolean fromLeft, int parent,
-                      int grandparent, int greatGrandparent) {
-    if (node == NULL) {
-      if (root == NULL) {
-        lastAdd = insert(NULL, NULL, false);
-        root = lastAdd;
-        wasAdd = true;
-        return false;
-      } else {
-        lastAdd = insert(NULL, NULL, true);
-        node = lastAdd;
-        wasAdd = true;
-        // connect the new node into the tree
-        if (fromLeft) {
-          setLeft(parent, node);
-        } else {
-          setRight(parent, node);
-        }
-      }
-    } else {
-      int compare = compareValue(node);
-      boolean keepGoing;
-
-      // Recurse down to find where the node needs to be added
-      if (compare < 0) {
-        keepGoing = add(getLeft(node), true, node, parent, grandparent);
-      } else if (compare > 0) {
-        keepGoing = add(getRight(node), false, node, parent, grandparent);
-      } else {
-        lastAdd = node;
-        wasAdd = false;
-        return false;
-      }
-
-      // we don't need to fix the root (because it is always set to black)
-      if (node == root || !keepGoing) {
-        return false;
-      }
-    }
-
-
-    // Do we need to fix this node? Only if there are two reds right under each
-    // other.
-    if (isRed(node) && isRed(parent)) {
-      if (parent == getLeft(grandparent)) {
-        int uncle = getRight(grandparent);
-        if (isRed(uncle)) {
-          // case 1.1
-          setRed(parent, false);
-          setRed(uncle, false);
-          setRed(grandparent, true);
-          return true;
-        } else {
-          if (node == getRight(parent)) {
-            // case 1.2
-            // swap node and parent
-            int tmp = node;
-            node = parent;
-            parent = tmp;
-            // left-rotate on node
-            setLeft(grandparent, parent);
-            setRight(node, getLeft(parent));
-            setLeft(parent, node);
-          }
-
-          // case 1.2 and 1.3
-          setRed(parent, false);
-          setRed(grandparent, true);
-
-          // right-rotate on grandparent
-          if (greatGrandparent == NULL) {
-            root = parent;
-          } else if (getLeft(greatGrandparent) == grandparent) {
-            setLeft(greatGrandparent, parent);
-          } else {
-            setRight(greatGrandparent, parent);
-          }
-          setLeft(grandparent, getRight(parent));
-          setRight(parent, grandparent);
-          return false;
-        }
-      } else {
-        int uncle = getLeft(grandparent);
-        if (isRed(uncle)) {
-          // case 2.1
-          setRed(parent, false);
-          setRed(uncle, false);
-          setRed(grandparent, true);
-          return true;
-        } else {
-          if (node == getLeft(parent)) {
-            // case 2.2
-            // swap node and parent
-            int tmp = node;
-            node = parent;
-            parent = tmp;
-            // right-rotate on node
-            setRight(grandparent, parent);
-            setLeft(node, getRight(parent));
-            setRight(parent, node);
-          }
-          // case 2.2 and 2.3
-          setRed(parent, false);
-          setRed(grandparent, true);
-          // left-rotate on grandparent
-          if (greatGrandparent == NULL) {
-            root = parent;
-          } else if (getRight(greatGrandparent) == grandparent) {
-            setRight(greatGrandparent, parent);
-          } else {
-            setLeft(greatGrandparent, parent);
-          }
-          setRight(grandparent, getLeft(parent));
-          setLeft(parent, grandparent);
-          return false;
-        }
-      }
-    } else {
-      return true;
-    }
-  }
-
-  /**
-   * Add the new key to the tree.
-   * @return true if the element is a new one.
-   */
-  protected boolean add() {
-    add(root, false, NULL, NULL, NULL);
-    if (wasAdd) {
-      setRed(root, false);
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * Get the number of elements in the set.
-   */
-  public int size() {
-    return size;
-  }
-
-  /**
-   * Reset the table to empty.
-   */
-  public void clear() {
-    root = NULL;
-    size = 0;
-    data.clear();
-  }
-
-  /**
-   * Get the buffer size in bytes.
-   */
-  public long getSizeInBytes() {
-    return data.getSizeInBytes();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
deleted file mode 100644
index f3e6184..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.EOFException;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-
-/**
- * A reader that reads a sequence of bytes. A control byte is read before
- * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
- * byte is -1 to -128, 1 to 128 literal byte values follow.
- */
-public class RunLengthByteReader {
-  private InStream input;
-  private final byte[] literals =
-    new byte[RunLengthByteWriter.MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private int used = 0;
-  private boolean repeat = false;
-
-  RunLengthByteReader(InStream input) throws IOException {
-    this.input = input;
-  }
-
-  public void setInStream(InStream input) {
-    this.input = input;
-  }
-
-  private void readValues(boolean ignoreEof) throws IOException {
-    int control = input.read();
-    used = 0;
-    if (control == -1) {
-      if (!ignoreEof) {
-        throw new EOFException("Read past end of buffer RLE byte from " + input);
-      }
-      used = numLiterals = 0;
-      return;
-    } else if (control < 0x80) {
-      repeat = true;
-      numLiterals = control + RunLengthByteWriter.MIN_REPEAT_SIZE;
-      int val = input.read();
-      if (val == -1) {
-        throw new EOFException("Reading RLE byte got EOF");
-      }
-      literals[0] = (byte) val;
-    } else {
-      repeat = false;
-      numLiterals = 0x100 - control;
-      int bytes = 0;
-      while (bytes < numLiterals) {
-        int result = input.read(literals, bytes, numLiterals - bytes);
-        if (result == -1) {
-          throw new EOFException("Reading RLE byte literal got EOF in " + this);
-        }
-        bytes += result;
-      }
-    }
-  }
-
-  boolean hasNext() throws IOException {
-    return used != numLiterals || input.available() > 0;
-  }
-
-  byte next() throws IOException {
-    byte result;
-    if (used == numLiterals) {
-      readValues(false);
-    }
-    if (repeat) {
-      result = literals[0];
-    } else {
-      result = literals[used];
-    }
-    ++used;
-    return result;
-  }
-
-  void nextVector(LongColumnVector previous, long previousLen)
-      throws IOException {
-    previous.isRepeating = true;
-    for (int i = 0; i < previousLen; i++) {
-      if (!previous.isNull[i]) {
-        previous.vector[i] = next();
-      } else {
-        // The default value of null for int types in vectorized
-        // processing is 1, so set that if the value is null
-        previous.vector[i] = 1;
-      }
-
-      // The default value for nulls in Vectorization for int types is 1
-      // and given that non null value can also be 1, we need to check for isNull also
-      // when determining the isRepeating flag.
-      if (previous.isRepeating
-          && i > 0
-          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
-        previous.isRepeating = false;
-      }
-    }
-  }
-
-  public void seek(PositionProvider index) throws IOException {
-    input.seek(index);
-    int consumed = (int) index.getNext();
-    if (consumed != 0) {
-      // a loop is required for cases where we break the run into two parts
-      while (consumed > 0) {
-        readValues(false);
-        used = consumed;
-        consumed -= numLiterals;
-      }
-    } else {
-      used = 0;
-      numLiterals = 0;
-    }
-  }
-
-  void skip(long items) throws IOException {
-    while (items > 0) {
-      if (used == numLiterals) {
-        readValues(false);
-      }
-      long consume = Math.min(items, numLiterals - used);
-      used += consume;
-      items -= consume;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "byte rle " + (repeat ? "repeat" : "literal") + " used: " +
-        used + "/" + numLiterals + " from " + input;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/9c7a78ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java
deleted file mode 100644
index ab4bbcc..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.io.orc;
-
-import java.io.IOException;
-
-/**
- * A streamFactory that writes a sequence of bytes. A control byte is written before
- * each run with positive values 0 to 127 meaning 2 to 129 repetitions. If the
- * bytes is -1 to -128, 1 to 128 literal byte values follow.
- */
-class RunLengthByteWriter {
-  static final int MIN_REPEAT_SIZE = 3;
-  static final int MAX_LITERAL_SIZE = 128;
-  static final int MAX_REPEAT_SIZE= 127 + MIN_REPEAT_SIZE;
-  private final PositionedOutputStream output;
-  private final byte[] literals = new byte[MAX_LITERAL_SIZE];
-  private int numLiterals = 0;
-  private boolean repeat = false;
-  private int tailRunLength = 0;
-
-  RunLengthByteWriter(PositionedOutputStream output) {
-    this.output = output;
-  }
-
-  private void writeValues() throws IOException {
-    if (numLiterals != 0) {
-      if (repeat) {
-        output.write(numLiterals - MIN_REPEAT_SIZE);
-        output.write(literals, 0, 1);
-     } else {
-        output.write(-numLiterals);
-        output.write(literals, 0, numLiterals);
-      }
-      repeat = false;
-      tailRunLength = 0;
-      numLiterals = 0;
-    }
-  }
-
-  void flush() throws IOException {
-    writeValues();
-    output.flush();
-  }
-
-  void write(byte value) throws IOException {
-    if (numLiterals == 0) {
-      literals[numLiterals++] = value;
-      tailRunLength = 1;
-    } else if (repeat) {
-      if (value == literals[0]) {
-        numLiterals += 1;
-        if (numLiterals == MAX_REPEAT_SIZE) {
-          writeValues();
-        }
-      } else {
-        writeValues();
-        literals[numLiterals++] = value;
-        tailRunLength = 1;
-      }
-    } else {
-      if (value == literals[numLiterals - 1]) {
-        tailRunLength += 1;
-      } else {
-        tailRunLength = 1;
-      }
-      if (tailRunLength == MIN_REPEAT_SIZE) {
-        if (numLiterals + 1 == MIN_REPEAT_SIZE) {
-          repeat = true;
-          numLiterals += 1;
-        } else {
-          numLiterals -= MIN_REPEAT_SIZE - 1;
-          writeValues();
-          literals[0] = value;
-          repeat = true;
-          numLiterals = MIN_REPEAT_SIZE;
-        }
-      } else {
-        literals[numLiterals++] = value;
-        if (numLiterals == MAX_LITERAL_SIZE) {
-          writeValues();
-        }
-      }
-    }
-  }
-
-  void getPosition(PositionRecorder recorder) throws IOException {
-    output.getPosition(recorder);
-    recorder.addPosition(numLiterals);
-  }
-}