You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2014/04/09 21:50:10 UTC
svn commit: r1586108 - in /hive/branches/branch-0.13: ./
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/java/org/apache/hadoop/hive/ql/io/sarg/
ql/src/java/org/apache/hadoop/hive/ql/plan/
ql/src/test/org/apache/hadoop/hive/ql/io/orc/
Author: omalley
Date: Wed Apr 9 19:50:09 2014
New Revision: 1586108
URL: http://svn.apache.org/r1586108
Log:
HIVE-6818. Fix array out of bounds when ORC is used with ACID and predicate pushdown. (omalley)
Modified:
hive/branches/branch-0.13/ (props changed)
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
Propchange: hive/branches/branch-0.13/
------------------------------------------------------------------------------
Merged /hive/trunk:r1586104
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Wed Apr 9 19:50:09 2014
@@ -106,6 +106,7 @@ public class OrcInputFormat implements
SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE");
static final String MAX_SPLIT_SIZE =
SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE");
+ static final String SARG_PUSHDOWN = "sarg.pushdown";
private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
@@ -268,21 +269,28 @@ public class OrcInputFormat implements
boolean isOriginal) {
int rootColumn = getRootColumn(isOriginal);
String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ String sargPushdown = conf.get(SARG_PUSHDOWN);
String columnNamesString =
conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
- if (serializedPushdown == null || columnNamesString == null) {
+ if ((sargPushdown == null && serializedPushdown == null)
+ || columnNamesString == null) {
LOG.debug("No ORC pushdown predicate");
options.searchArgument(null, null);
} else {
- SearchArgument sarg = SearchArgument.FACTORY.create
- (Utilities.deserializeExpression(serializedPushdown));
+ SearchArgument sarg;
+ if (serializedPushdown != null) {
+ sarg = SearchArgument.FACTORY.create
+ (Utilities.deserializeExpression(serializedPushdown));
+ } else {
+ sarg = SearchArgument.FACTORY.create(sargPushdown);
+ }
LOG.info("ORC pushdown predicate: " + sarg);
String[] neededColumnNames = columnNamesString.split(",");
String[] columnNames = new String[types.size() - rootColumn];
boolean[] includedColumns = options.getInclude();
int i = 0;
for(int columnId: types.get(rootColumn).getSubtypesList()) {
- if (includedColumns == null || includedColumns[columnId]) {
+ if (includedColumns == null || includedColumns[columnId - rootColumn]) {
// this is guaranteed to be positive because types only have children
// ids greater than their own id.
columnNames[columnId - rootColumn] = neededColumnNames[i++];
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java Wed Apr 9 19:50:09 2014
@@ -166,6 +166,12 @@ public interface SearchArgument {
public TruthValue evaluate(TruthValue[] leaves);
/**
+ * Serialize the SARG as a kyro object and return the base64 strig.
+ * @return the serialized SARG
+ */
+ public String toKryo();
+
+ /**
* A factory for creating SearchArguments. Java doesn't allow static methods
* in interfaces. *DOH*
*/
@@ -177,6 +183,10 @@ public interface SearchArgument {
public Builder newBuilder() {
return SearchArgumentImpl.newBuilder();
}
+
+ public SearchArgument create(String kryo) {
+ return SearchArgumentImpl.fromKryo(kryo);
+ }
}
/**
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Wed Apr 9 19:50:09 2014
@@ -26,6 +26,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -65,6 +69,14 @@ final class SearchArgumentImpl implement
private final Object literal;
private final List<Object> literalList;
+ PredicateLeafImpl() {
+ operator = null;
+ type = null;
+ columnName = null;
+ literal = null;
+ literalList = null;
+ }
+
PredicateLeafImpl(Operator operator,
Type type,
String columnName,
@@ -166,6 +178,13 @@ final class SearchArgumentImpl implement
private final int leaf;
private final TruthValue constant;
+ ExpressionTree() {
+ operator = null;
+ children = null;
+ leaf = 0;
+ constant = null;
+ }
+
ExpressionTree(Operator op, ExpressionTree... kids) {
operator = op;
children = new ArrayList<ExpressionTree>();
@@ -818,6 +837,11 @@ final class SearchArgumentImpl implement
}
}
+ SearchArgumentImpl() {
+ leaves = null;
+ expression = null;
+ }
+
SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
this.expression = expression;
this.leaves = leaves;
@@ -852,6 +876,18 @@ final class SearchArgumentImpl implement
return buffer.toString();
}
+ public String toKryo() {
+ Output out = new Output(4 * 1024, 10 * 1024 * 1024);
+ new Kryo().writeObject(out, this);
+ out.close();
+ return Base64.encodeBase64String(out.toBytes());
+ }
+
+ static SearchArgument fromKryo(String value) {
+ Input input = new Input(Base64.decodeBase64(value));
+ return new Kryo().readObject(input, SearchArgumentImpl.class);
+ }
+
private static class BuilderImpl implements Builder {
private final Deque<ExpressionTree> currentTree =
new ArrayDeque<ExpressionTree>();
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Wed Apr 9 19:50:09 2014
@@ -71,7 +71,7 @@ public class ExprNodeGenericFuncDesc ext
//Is this an expression that should perform a comparison for sorted searches
private boolean isSortedExpr;
- public ExprNodeGenericFuncDesc() {
+ public ExprNodeGenericFuncDesc() {;
}
/* If the function has an explicit name like func(args) then call a
Modified: hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1586108&r1=1586107&r2=1586108&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Wed Apr 9 19:50:09 2014
@@ -59,6 +59,8 @@ import org.apache.hadoop.hive.ql.io.FSRe
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -1287,4 +1289,49 @@ public class TestInputOutputFormat {
ioe.getMessage());
}
}
+
+ @Test
+ public void testSetSearchArgument() throws Exception {
+ Reader.Options options = new Reader.Options();
+ List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+ OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
+ builder.setKind(OrcProto.Type.Kind.STRUCT)
+ .addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid",
+ "row"))
+ .addAllSubtypes(Arrays.asList(1,2,3,4,5,6));
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.INT);
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.STRUCT)
+ .addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store"))
+ .addAllSubtypes(Arrays.asList(7, 8, 9, 10));
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.STRING);
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.INT);
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ SearchArgument isNull = SearchArgument.FACTORY.newBuilder()
+ .startAnd().isNull("cost").end().build();
+ conf.set(OrcInputFormat.SARG_PUSHDOWN, isNull.toKryo());
+ conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
+ "url,cost");
+ options.include(new boolean[]{true, true, false, true, false});
+ OrcInputFormat.setSearchArgument(options, types, conf, false);
+ String[] colNames = options.getColumnNames();
+ assertEquals(null, colNames[0]);
+ assertEquals("url", colNames[1]);
+ assertEquals(null, colNames[2]);
+ assertEquals("cost", colNames[3]);
+ assertEquals(null, colNames[4]);
+ SearchArgument arg = options.getSearchArgument();
+ List<PredicateLeaf> leaves = arg.getLeaves();
+ assertEquals("cost", leaves.get(0).getColumnName());
+ assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
+ }
}