You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2014/04/09 21:50:10 UTC

svn commit: r1586108 - in /hive/branches/branch-0.13: ./ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ ql/src/java/org/apache/hadoop/hive/ql/plan/ ql/src/test/org/apache/hadoop/hive/ql/io/orc/

Author: omalley
Date: Wed Apr  9 19:50:09 2014
New Revision: 1586108

URL: http://svn.apache.org/r1586108
Log:
HIVE-6818. Fix array out of bounds when ORC is used with ACID and predicate pushdown. (omalley)

Modified:
    hive/branches/branch-0.13/   (props changed)
    hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
    hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
    hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java

Propchange: hive/branches/branch-0.13/
------------------------------------------------------------------------------
  Merged /hive/trunk:r1586104

Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Wed Apr  9 19:50:09 2014
@@ -106,6 +106,7 @@ public class OrcInputFormat  implements 
       SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE");
   static final String MAX_SPLIT_SIZE =
       SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE");
+  static final String SARG_PUSHDOWN = "sarg.pushdown";
 
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
@@ -268,21 +269,28 @@ public class OrcInputFormat  implements 
                                 boolean isOriginal) {
     int rootColumn = getRootColumn(isOriginal);
     String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+    String sargPushdown = conf.get(SARG_PUSHDOWN);
     String columnNamesString =
         conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
-    if (serializedPushdown == null || columnNamesString == null) {
+    if ((sargPushdown == null && serializedPushdown == null)
+        || columnNamesString == null) {
       LOG.debug("No ORC pushdown predicate");
       options.searchArgument(null, null);
     } else {
-      SearchArgument sarg = SearchArgument.FACTORY.create
-          (Utilities.deserializeExpression(serializedPushdown));
+      SearchArgument sarg;
+      if (serializedPushdown != null) {
+        sarg = SearchArgument.FACTORY.create
+            (Utilities.deserializeExpression(serializedPushdown));
+      } else {
+        sarg = SearchArgument.FACTORY.create(sargPushdown);
+      }
       LOG.info("ORC pushdown predicate: " + sarg);
       String[] neededColumnNames = columnNamesString.split(",");
       String[] columnNames = new String[types.size() - rootColumn];
       boolean[] includedColumns = options.getInclude();
       int i = 0;
       for(int columnId: types.get(rootColumn).getSubtypesList()) {
-        if (includedColumns == null || includedColumns[columnId]) {
+        if (includedColumns == null || includedColumns[columnId - rootColumn]) {
           // this is guaranteed to be positive because types only have children
           // ids greater than their own id.
           columnNames[columnId - rootColumn] = neededColumnNames[i++];

Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java Wed Apr  9 19:50:09 2014
@@ -166,6 +166,12 @@ public interface SearchArgument {
   public TruthValue evaluate(TruthValue[] leaves);
 
   /**
+   * Serialize the SARG as a kyro object and return the base64 strig.
+   * @return the serialized SARG
+   */
+  public String toKryo();
+
+  /**
    * A factory for creating SearchArguments. Java doesn't allow static methods
    * in interfaces. *DOH*
    */
@@ -177,6 +183,10 @@ public interface SearchArgument {
     public Builder newBuilder() {
       return SearchArgumentImpl.newBuilder();
     }
+
+    public SearchArgument create(String kryo) {
+      return SearchArgumentImpl.fromKryo(kryo);
+    }
   }
 
   /**

Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Wed Apr  9 19:50:09 2014
@@ -26,6 +26,10 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -65,6 +69,14 @@ final class SearchArgumentImpl implement
     private final Object literal;
     private final List<Object> literalList;
 
+    PredicateLeafImpl() {
+      operator = null;
+      type = null;
+      columnName = null;
+      literal = null;
+      literalList = null;
+    }
+
     PredicateLeafImpl(Operator operator,
                       Type type,
                       String columnName,
@@ -166,6 +178,13 @@ final class SearchArgumentImpl implement
     private final int leaf;
     private final TruthValue constant;
 
+    ExpressionTree() {
+      operator = null;
+      children = null;
+      leaf = 0;
+      constant = null;
+    }
+
     ExpressionTree(Operator op, ExpressionTree... kids) {
       operator = op;
       children = new ArrayList<ExpressionTree>();
@@ -818,6 +837,11 @@ final class SearchArgumentImpl implement
     }
   }
 
+  SearchArgumentImpl() {
+    leaves = null;
+    expression = null;
+  }
+
   SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
     this.expression = expression;
     this.leaves = leaves;
@@ -852,6 +876,18 @@ final class SearchArgumentImpl implement
     return buffer.toString();
   }
 
+  public String toKryo() {
+    Output out = new Output(4 * 1024, 10 * 1024 * 1024);
+    new Kryo().writeObject(out, this);
+    out.close();
+    return Base64.encodeBase64String(out.toBytes());
+  }
+
+  static SearchArgument fromKryo(String value) {
+    Input input = new Input(Base64.decodeBase64(value));
+    return new Kryo().readObject(input, SearchArgumentImpl.class);
+  }
+
   private static class BuilderImpl implements Builder {
     private final Deque<ExpressionTree> currentTree =
         new ArrayDeque<ExpressionTree>();

Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Wed Apr  9 19:50:09 2014
@@ -71,7 +71,7 @@ public class ExprNodeGenericFuncDesc ext
   //Is this an expression that should perform a comparison for sorted searches
   private boolean isSortedExpr;
 
-  public ExprNodeGenericFuncDesc() {
+  public ExprNodeGenericFuncDesc() {;
   }
 
   /* If the function has an explicit name like func(args) then call a

Modified: hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Wed Apr  9 19:50:09 2014
@@ -59,6 +59,8 @@ import org.apache.hadoop.hive.ql.io.FSRe
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -1287,4 +1289,49 @@ public class TestInputOutputFormat {
           ioe.getMessage());
     }
   }
+
+  @Test
+  public void testSetSearchArgument() throws Exception {
+    Reader.Options options = new Reader.Options();
+    List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+    OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
+    builder.setKind(OrcProto.Type.Kind.STRUCT)
+        .addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid",
+            "row"))
+        .addAllSubtypes(Arrays.asList(1,2,3,4,5,6));
+    types.add(builder.build());
+    builder.clear().setKind(OrcProto.Type.Kind.INT);
+    types.add(builder.build());
+    types.add(builder.build());
+    types.add(builder.build());
+    types.add(builder.build());
+    types.add(builder.build());
+    builder.clear().setKind(OrcProto.Type.Kind.STRUCT)
+        .addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store"))
+        .addAllSubtypes(Arrays.asList(7, 8, 9, 10));
+    types.add(builder.build());
+    builder.clear().setKind(OrcProto.Type.Kind.STRING);
+    types.add(builder.build());
+    builder.clear().setKind(OrcProto.Type.Kind.INT);
+    types.add(builder.build());
+    types.add(builder.build());
+    types.add(builder.build());
+    SearchArgument isNull = SearchArgument.FACTORY.newBuilder()
+        .startAnd().isNull("cost").end().build();
+    conf.set(OrcInputFormat.SARG_PUSHDOWN, isNull.toKryo());
+    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
+        "url,cost");
+    options.include(new boolean[]{true, true, false, true, false});
+    OrcInputFormat.setSearchArgument(options, types, conf, false);
+    String[] colNames = options.getColumnNames();
+    assertEquals(null, colNames[0]);
+    assertEquals("url", colNames[1]);
+    assertEquals(null, colNames[2]);
+    assertEquals("cost", colNames[3]);
+    assertEquals(null, colNames[4]);
+    SearchArgument arg = options.getSearchArgument();
+    List<PredicateLeaf> leaves = arg.getLeaves();
+    assertEquals("cost", leaves.get(0).getColumnName());
+    assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
+  }
 }