You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/16 19:07:03 UTC
[1/5] hive git commit: HIVE-11401: Predicate push down does not work
with Parquet when partitions are in the expression (Sergio Pena,
reviewed by Szehon Ho)
Repository: hive
Updated Branches:
refs/heads/spark a8c49ef41 -> f78f66359
HIVE-11401: Predicate push down does not work with Parquet when partitions are in the expression (Sergio Pena, reviewed by Szehon Ho)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/724b3193
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/724b3193
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/724b3193
Branch: refs/heads/spark
Commit: 724b31930718eea606dfe6d95eda7385209caa5f
Parents: 7df9d7a
Author: Sergio Pena <se...@cloudera.com>
Authored: Fri Jul 31 09:48:28 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Fri Jul 31 09:48:28 2015 -0500
----------------------------------------------------------------------
.../read/ParquetFilterPredicateConverter.java | 148 +++++++++++++++++++
.../read/ParquetRecordReaderWrapper.java | 122 ++-------------
.../parquet/TestParquetRecordReaderWrapper.java | 14 +-
.../read/TestParquetFilterPredicate.java | 51 +++++++
.../ql/io/sarg/TestConvertAstToSearchArg.java | 25 ++--
.../clientpositive/parquet_predicate_pushdown.q | 9 ++
.../parquet_predicate_pushdown.q.out | 47 ++++++
7 files changed, 283 insertions(+), 133 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
new file mode 100644
index 0000000..f170026
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.read;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
+import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
+import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.parquet.filter2.predicate.FilterApi;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Type;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class ParquetFilterPredicateConverter {
+ private static final Log LOG = LogFactory.getLog(ParquetFilterPredicateConverter.class);
+
+ /**
+ * Translate the search argument to the filter predicate parquet uses
+ * @return translate the sarg into a filter predicate
+ */
+ public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
+ return toFilterPredicate(sarg, null);
+ }
+
+ /**
+ * Translate the search argument to the filter predicate parquet uses. It includes
+ * only the columns from the passed schema.
+ * @return translate the sarg into a filter predicate
+ */
+ public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) {
+ Set<String> columns = null;
+ if (schema != null) {
+ columns = new HashSet<String>();
+ for (Type field : schema.getFields()) {
+ columns.add(field.getName());
+ }
+ }
+
+ return translate(sarg.getExpression(), sarg.getLeaves(), columns);
+ }
+
+ private static FilterPredicate translate(ExpressionTree root, List<PredicateLeaf> leaves, Set<String> columns) {
+ FilterPredicate p = null;
+ switch (root.getOperator()) {
+ case OR:
+ for(ExpressionTree child: root.getChildren()) {
+ if (p == null) {
+ p = translate(child, leaves, columns);
+ } else {
+ FilterPredicate right = translate(child, leaves, columns);
+ // constant means no filter, ignore it when it is null
+ if(right != null){
+ p = FilterApi.or(p, right);
+ }
+ }
+ }
+ return p;
+ case AND:
+ for(ExpressionTree child: root.getChildren()) {
+ if (p == null) {
+ p = translate(child, leaves, columns);
+ } else {
+ FilterPredicate right = translate(child, leaves, columns);
+ // constant means no filter, ignore it when it is null
+ if(right != null){
+ p = FilterApi.and(p, right);
+ }
+ }
+ }
+ return p;
+ case NOT:
+ FilterPredicate op = translate(root.getChildren().get(0), leaves, columns);
+ if (op != null) {
+ return FilterApi.not(op);
+ } else {
+ return null;
+ }
+ case LEAF:
+ PredicateLeaf leaf = leaves.get(root.getLeaf());
+
+ // If columns is null, then we need to create the leaf
+ if (columns == null || columns.contains(leaf.getColumnName())) {
+ return buildFilterPredicateFromPredicateLeaf(leaf);
+ } else {
+ // Do not create predicate if the leaf is not on the passed schema.
+ return null;
+ }
+ case CONSTANT:
+ return null;// no filter will be executed for constant
+ default:
+ throw new IllegalStateException("Unknown operator: " +
+ root.getOperator());
+ }
+ }
+
+ private static FilterPredicate buildFilterPredicateFromPredicateLeaf
+ (PredicateLeaf leaf) {
+ LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
+ FilterPredicateLeafBuilder builder;
+ try {
+ builder = leafFilterFactory
+ .getLeafFilterBuilderByType(leaf.getType());
+ if (builder == null) {
+ return null;
+ }
+ if (isMultiLiteralsOperator(leaf.getOperator())) {
+ return builder.buildPredicate(leaf.getOperator(),
+ leaf.getLiteralList(),
+ leaf.getColumnName());
+ } else {
+ return builder
+ .buildPredict(leaf.getOperator(),
+ leaf.getLiteral(),
+ leaf.getColumnName());
+ }
+ } catch (Exception e) {
+ LOG.error("fail to build predicate filter leaf with errors" + e, e);
+ return null;
+ }
+ }
+
+ private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
+ return (op == PredicateLeaf.Operator.IN) ||
+ (op == PredicateLeaf.Operator.BETWEEN);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
index 49e52da..f689b90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
@@ -22,17 +22,10 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
-import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
-import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
@@ -46,7 +39,6 @@ import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.compat.RowGroupFilter;
-import org.apache.parquet.filter2.predicate.FilterApi;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetInputFormat;
@@ -57,6 +49,7 @@ import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.FileMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.ContextUtil;
+import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import com.google.common.base.Strings;
@@ -139,26 +132,23 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, A
}
}
- public FilterCompat.Filter setFilter(final JobConf conf) {
- String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
- String columnNamesString =
- conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
- if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() ||
- columnNamesString.isEmpty()) {
+ public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
+ SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
+ if (sarg == null) {
return null;
}
- SearchArgument sarg =
- ConvertAstToSearchArg.create(Utilities.deserializeExpression
- (serializedPushdown));
- FilterPredicate p = toFilterPredicate(sarg);
+ // Create the Parquet FilterPredicate without including columns that do not exist
+ // on the shema (such as partition columns).
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
if (p != null) {
- LOG.debug("Predicate filter for parquet is " + p.toString());
+ // Filter may have sensitive information. Do not send to debug.
+ LOG.debug("PARQUET predicate push down generated.");
ParquetInputFormat.setFilterPredicate(conf, p);
return FilterCompat.get(p);
} else {
- LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR +
- " with the value of " + serializedPushdown);
+ // Filter may have sensitive information. Do not send to debug.
+ LOG.debug("No PARQUET predicate push down is generated.");
return null;
}
}
@@ -250,7 +240,6 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, A
if (oldSplit instanceof FileSplit) {
final Path finalPath = ((FileSplit) oldSplit).getPath();
jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());
- FilterCompat.Filter filter = setFilter(jobConf);
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath);
final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
@@ -274,6 +263,7 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, A
return null;
}
+ FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema());
if (filter != null) {
filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema());
if (filtedBlocks.isEmpty()) {
@@ -310,92 +300,4 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, A
public List<BlockMetaData> getFiltedBlocks() {
return filtedBlocks;
}
-
- /**
- * Translate the search argument to the filter predicate parquet used
- * @return translate the sarg into a filter predicate
- */
- public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
- return translate(sarg.getExpression(), sarg.getLeaves());
- }
-
- private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
- return (op == PredicateLeaf.Operator.IN) ||
- (op == PredicateLeaf.Operator.BETWEEN);
- }
-
- private static FilterPredicate translate(ExpressionTree root,
- List<PredicateLeaf> leafs){
- FilterPredicate p = null;
- switch (root.getOperator()) {
- case OR:
- for(ExpressionTree child: root.getChildren()) {
- if (p == null) {
- p = translate(child, leafs);
- } else {
- FilterPredicate right = translate(child, leafs);
- // constant means no filter, ignore it when it is null
- if(right != null){
- p = FilterApi.or(p, right);
- }
- }
- }
- return p;
- case AND:
- for(ExpressionTree child: root.getChildren()) {
- if (p == null) {
- p = translate(child, leafs);
- } else {
- FilterPredicate right = translate(child, leafs);
- // constant means no filter, ignore it when it is null
- if(right != null){
- p = FilterApi.and(p, right);
- }
- }
- }
- return p;
- case NOT:
- FilterPredicate op = translate(root.getChildren().get(0), leafs);
- if (op != null) {
- return FilterApi.not(op);
- } else {
- return null;
- }
- case LEAF:
- return buildFilterPredicateFromPredicateLeaf(leafs.get(root.getLeaf()));
- case CONSTANT:
- return null;// no filter will be executed for constant
- default:
- throw new IllegalStateException("Unknown operator: " +
- root.getOperator());
- }
- }
-
- private static FilterPredicate buildFilterPredicateFromPredicateLeaf
- (PredicateLeaf leaf) {
- LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
- FilterPredicateLeafBuilder builder;
- try {
- builder = leafFilterFactory
- .getLeafFilterBuilderByType(leaf.getType());
- if (builder == null) {
- return null;
- }
- if (isMultiLiteralsOperator(leaf.getOperator())) {
- return builder.buildPredicate(leaf.getOperator(),
- leaf.getLiteralList(),
- leaf.getColumnName());
- } else {
- return builder
- .buildPredict(leaf.getOperator(),
- leaf.getLiteral(),
- leaf.getColumnName());
- }
- } catch (Exception e) {
- LOG.error("fail to build predicate filter leaf with errors" + e, e);
- return null;
- }
- }
-
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
index 87dd344..f9ca528 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
@@ -22,7 +22,7 @@ import static junit.framework.Assert.assertEquals;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -56,7 +56,7 @@ public class TestParquetRecordReaderWrapper {
.end()
.build();
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected =
"and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
"eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
@@ -76,7 +76,7 @@ public class TestParquetRecordReaderWrapper {
.end()
.build();
assertEquals("lteq(y, Binary{\"hi \"})",
- ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString());
+ ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
@@ -91,7 +91,7 @@ public class TestParquetRecordReaderWrapper {
.end()
.build();
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected =
"and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
"not(eq(a, Binary{\"stinger\"})))";
@@ -111,7 +111,7 @@ public class TestParquetRecordReaderWrapper {
.end()
.build();
assertEquals("lteq(y, Binary{\"hi \"})",
- ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString());
+ ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
sarg = SearchArgumentFactory.newBuilder()
.startNot()
@@ -126,7 +126,7 @@ public class TestParquetRecordReaderWrapper {
.end()
.build();
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
"not(eq(a, Binary{\"stinger\"})))";
assertEquals(expected, p.toString());
@@ -146,7 +146,7 @@ public class TestParquetRecordReaderWrapper {
.end()
.build();
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," +
" lteq(y, Binary{\"hi \"})), eq(z, " +
"0.22)), eq(z1, 0.22))";
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
new file mode 100644
index 0000000..847a02b
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.parquet.read;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.junit.Test;
+
+import static junit.framework.Assert.assertEquals;
+
+public class TestParquetFilterPredicate {
+ @Test
+ public void testFilterColumnsThatDoNoExistOnSchema() {
+ MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }");
+ SearchArgument sarg = SearchArgumentFactory.newBuilder()
+ .startNot()
+ .startOr()
+ .isNull("a", PredicateLeaf.Type.INTEGER)
+ .between("y", PredicateLeaf.Type.INTEGER, 10, 20) // Column will be removed from filter
+ .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) // Column will be removed from filter
+ .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
+ .end()
+ .end()
+ .build();
+
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
+
+ String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
+ assertEquals(expected, p.toString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
index 85e952f..9e8425a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
@@ -24,22 +24,15 @@ import static junit.framework.Assert.assertTrue;
import com.google.common.collect.Sets;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.PredicateLeafImpl;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.junit.Test;
import java.beans.XMLDecoder;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
-import java.lang.reflect.Field;
-import java.sql.Date;
-import java.sql.Timestamp;
import java.util.List;
import java.util.Set;
@@ -557,7 +550,7 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String[] conditions = new String[]{
"eq(first_name, Binary{\"john\"})", /* first_name = 'john' */
"not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
@@ -849,7 +842,7 @@ public class TestConvertAstToSearchArg {
"lteq(id, 4)" /* id <= 4 */
};
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
assertEquals(expected, p.toString());
@@ -1279,7 +1272,7 @@ public class TestConvertAstToSearchArg {
"eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */
};
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
assertEquals(expected, p.toString());
@@ -1500,7 +1493,7 @@ public class TestConvertAstToSearchArg {
"or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
};
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
assertEquals(expected, p.toString());
@@ -1759,7 +1752,7 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected =
"and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
assertEquals(p.toString(), expected);
@@ -2239,7 +2232,7 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(9, leaves.size());
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
"or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
"or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
@@ -2395,7 +2388,7 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(0, leaves.size());
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
assertNull(p);
assertEquals("YES_NO_NULL",
@@ -2650,7 +2643,7 @@ public class TestConvertAstToSearchArg {
List<PredicateLeaf> leaves = sarg.getLeaves();
assertEquals(1, leaves.size());
- FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+ FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
assertEquals(expected, p.toString());
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
new file mode 100644
index 0000000..08af84f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
@@ -0,0 +1,9 @@
+SET hive.optimize.index.filter=true;
+SET hive.optimize.ppd=true;
+
+-- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
+ALTER TABLE part1 ADD PARTITION (p='p1');
+INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
+SELECT * FROM part1 WHERE p='p1';
+DROP TABLE part1 PURGE;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
new file mode 100644
index 0000000..4186618
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part1
+POSTHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part1
+PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@part1
+POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@part1
+POSTHOOK: Output: default@part1@p=p1
+PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@part1@p=p1
+POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@part1@p=p1
+POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part1
+PREHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part1
+POSTHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+1 a p1
+2 b p1
+PREHOOK: query: DROP TABLE part1 PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@part1
+PREHOOK: Output: default@part1
+POSTHOOK: query: DROP TABLE part1 PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@part1
+POSTHOOK: Output: default@part1
[5/5] hive git commit: Merge branch 'master' into spark
Posted by xu...@apache.org.
Merge branch 'master' into spark
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f78f6635
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f78f6635
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f78f6635
Branch: refs/heads/spark
Commit: f78f66359cdbd7963c3bdfbc65663010f3531719
Parents: a8c49ef 2519915
Author: xzhang <xz...@xzdt>
Authored: Wed Sep 16 10:00:19 2015 -0700
Committer: xzhang <xz...@xzdt>
Committed: Wed Sep 16 10:00:19 2015 -0700
----------------------------------------------------------------------
ql/pom.xml | 1 +
.../read/ParquetFilterPredicateConverter.java | 148 ++++++++++
.../read/ParquetRecordReaderWrapper.java | 122 +-------
.../parquet/TestParquetRecordReaderWrapper.java | 14 +-
.../read/TestParquetFilterPredicate.java | 51 ++++
.../ql/io/sarg/TestConvertAstToSearchArg.java | 25 +-
.../clientpositive/parquet_predicate_pushdown.q | 9 +
.../clientpositive/unionall_unbalancedppd.q | 72 +++++
.../parquet_predicate_pushdown.q.out | 47 ++++
.../clientpositive/unionall_unbalancedppd.q.out | 280 +++++++++++++++++++
10 files changed, 636 insertions(+), 133 deletions(-)
----------------------------------------------------------------------
[4/5] hive git commit: HIVE-10166: Merge Spark branch to master
7/30/2015 (reviewed by Chao)
Posted by xu...@apache.org.
HIVE-10166: Merge Spark branch to master 7/30/2015 (reviewed by Chao)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/25199156
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/25199156
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/25199156
Branch: refs/heads/spark
Commit: 251991568c5e9e38b3480e9ef5dc972b9da112db
Parents: bc528ba 714b3db
Author: xzhang <xz...@xzdt>
Authored: Fri Jul 31 15:55:04 2015 -0700
Committer: xzhang <xz...@xzdt>
Committed: Fri Jul 31 15:55:04 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 12 +
itests/qtest-spark/pom.xml | 24 +
itests/qtest/pom.xml | 2 +-
.../test/resources/testconfiguration.properties | 48 +-
.../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 4708 ++++-
.../gen/thrift/gen-cpp/ThriftHiveMetastore.h | 7807 +++----
.../thrift/gen-cpp/hive_metastore_constants.cpp | 2 +-
.../thrift/gen-cpp/hive_metastore_constants.h | 2 +-
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 6044 +++++-
.../gen/thrift/gen-cpp/hive_metastore_types.h | 2619 ++-
.../hive/metastore/api/AbortTxnRequest.java | 24 +-
.../metastore/api/AddDynamicPartitions.java | 69 +-
.../metastore/api/AddPartitionsRequest.java | 80 +-
.../hive/metastore/api/AddPartitionsResult.java | 48 +-
.../hadoop/hive/metastore/api/AggrStats.java | 54 +-
.../metastore/api/AlreadyExistsException.java | 24 +-
.../metastore/api/BinaryColumnStatsData.java | 40 +-
.../metastore/api/BooleanColumnStatsData.java | 40 +-
.../hive/metastore/api/CheckLockRequest.java | 24 +-
.../hive/metastore/api/ColumnStatistics.java | 54 +-
.../metastore/api/ColumnStatisticsData.java | 20 +-
.../metastore/api/ColumnStatisticsDesc.java | 58 +-
.../hive/metastore/api/ColumnStatisticsObj.java | 40 +-
.../hive/metastore/api/CommitTxnRequest.java | 24 +-
.../hive/metastore/api/CompactionRequest.java | 62 +-
.../hive/metastore/api/CompactionType.java | 2 +-
.../api/ConfigValSecurityException.java | 24 +-
.../api/CurrentNotificationEventId.java | 24 +-
.../hadoop/hive/metastore/api/Database.java | 115 +-
.../apache/hadoop/hive/metastore/api/Date.java | 24 +-
.../hive/metastore/api/DateColumnStatsData.java | 50 +-
.../hadoop/hive/metastore/api/Decimal.java | 41 +-
.../metastore/api/DecimalColumnStatsData.java | 50 +-
.../metastore/api/DoubleColumnStatsData.java | 50 +-
.../hive/metastore/api/DropPartitionsExpr.java | 43 +-
.../metastore/api/DropPartitionsRequest.java | 82 +-
.../metastore/api/DropPartitionsResult.java | 48 +-
.../hive/metastore/api/EnvironmentContext.java | 61 +-
.../hive/metastore/api/EventRequestType.java | 2 +-
.../hadoop/hive/metastore/api/FieldSchema.java | 58 +-
.../hive/metastore/api/FireEventRequest.java | 79 +-
.../metastore/api/FireEventRequestData.java | 20 +-
.../hive/metastore/api/FireEventResponse.java | 16 +-
.../hadoop/hive/metastore/api/Function.java | 110 +-
.../hadoop/hive/metastore/api/FunctionType.java | 2 +-
.../metastore/api/GetOpenTxnsInfoResponse.java | 54 +-
.../hive/metastore/api/GetOpenTxnsResponse.java | 53 +-
.../api/GetPrincipalsInRoleRequest.java | 24 +-
.../api/GetPrincipalsInRoleResponse.java | 46 +-
.../api/GetRoleGrantsForPrincipalRequest.java | 36 +-
.../api/GetRoleGrantsForPrincipalResponse.java | 46 +-
.../api/GrantRevokePrivilegeRequest.java | 46 +-
.../api/GrantRevokePrivilegeResponse.java | 26 +-
.../metastore/api/GrantRevokeRoleRequest.java | 86 +-
.../metastore/api/GrantRevokeRoleResponse.java | 26 +-
.../hive/metastore/api/GrantRevokeType.java | 2 +-
.../hive/metastore/api/HeartbeatRequest.java | 34 +-
.../metastore/api/HeartbeatTxnRangeRequest.java | 32 +-
.../api/HeartbeatTxnRangeResponse.java | 74 +-
.../hive/metastore/api/HiveObjectPrivilege.java | 52 +-
.../hive/metastore/api/HiveObjectRef.java | 81 +-
.../hive/metastore/api/HiveObjectType.java | 2 +-
.../apache/hadoop/hive/metastore/api/Index.java | 133 +-
.../api/IndexAlreadyExistsException.java | 24 +-
.../metastore/api/InsertEventRequestData.java | 45 +-
.../metastore/api/InvalidInputException.java | 24 +-
.../metastore/api/InvalidObjectException.java | 24 +-
.../api/InvalidOperationException.java | 24 +-
.../api/InvalidPartitionException.java | 24 +-
.../hive/metastore/api/LockComponent.java | 66 +-
.../hadoop/hive/metastore/api/LockLevel.java | 2 +-
.../hadoop/hive/metastore/api/LockRequest.java | 72 +-
.../hadoop/hive/metastore/api/LockResponse.java | 36 +-
.../hadoop/hive/metastore/api/LockState.java | 2 +-
.../hadoop/hive/metastore/api/LockType.java | 2 +-
.../hive/metastore/api/LongColumnStatsData.java | 50 +-
.../hive/metastore/api/MetaException.java | 24 +-
.../hive/metastore/api/NoSuchLockException.java | 24 +-
.../metastore/api/NoSuchObjectException.java | 24 +-
.../hive/metastore/api/NoSuchTxnException.java | 24 +-
.../hive/metastore/api/NotificationEvent.java | 66 +-
.../metastore/api/NotificationEventRequest.java | 34 +-
.../api/NotificationEventResponse.java | 46 +-
.../hive/metastore/api/OpenTxnRequest.java | 40 +-
.../hive/metastore/api/OpenTxnsResponse.java | 45 +-
.../apache/hadoop/hive/metastore/api/Order.java | 32 +-
.../hadoop/hive/metastore/api/Partition.java | 156 +-
.../hive/metastore/api/PartitionEventType.java | 2 +-
.../api/PartitionListComposingSpec.java | 46 +-
.../hive/metastore/api/PartitionSpec.java | 58 +-
.../api/PartitionSpecWithSharedSD.java | 54 +-
.../hive/metastore/api/PartitionWithoutSD.java | 124 +-
.../metastore/api/PartitionsByExprRequest.java | 67 +-
.../metastore/api/PartitionsByExprResult.java | 54 +-
.../metastore/api/PartitionsStatsRequest.java | 90 +-
.../metastore/api/PartitionsStatsResult.java | 72 +-
.../metastore/api/PrincipalPrivilegeSet.java | 184 +-
.../hive/metastore/api/PrincipalType.java | 2 +-
.../hadoop/hive/metastore/api/PrivilegeBag.java | 46 +-
.../hive/metastore/api/PrivilegeGrantInfo.java | 60 +-
.../hive/metastore/api/RequestPartsSpec.java | 56 +-
.../hadoop/hive/metastore/api/ResourceType.java | 2 +-
.../hadoop/hive/metastore/api/ResourceUri.java | 36 +-
.../apache/hadoop/hive/metastore/api/Role.java | 40 +-
.../hive/metastore/api/RolePrincipalGrant.java | 80 +-
.../hadoop/hive/metastore/api/Schema.java | 91 +-
.../hadoop/hive/metastore/api/SerDeInfo.java | 93 +-
.../api/SetPartitionsStatsRequest.java | 46 +-
.../hive/metastore/api/ShowCompactRequest.java | 16 +-
.../hive/metastore/api/ShowCompactResponse.java | 46 +-
.../api/ShowCompactResponseElement.java | 86 +-
.../hive/metastore/api/ShowLocksRequest.java | 16 +-
.../hive/metastore/api/ShowLocksResponse.java | 46 +-
.../metastore/api/ShowLocksResponseElement.java | 114 +-
.../hadoop/hive/metastore/api/SkewedInfo.java | 147 +-
.../hive/metastore/api/StorageDescriptor.java | 242 +-
.../metastore/api/StringColumnStatsData.java | 48 +-
.../apache/hadoop/hive/metastore/api/Table.java | 189 +-
.../hive/metastore/api/TableStatsRequest.java | 61 +-
.../hive/metastore/api/TableStatsResult.java | 46 +-
.../hive/metastore/api/ThriftHiveMetastore.java | 18497 ++++++++++++-----
.../hive/metastore/api/TxnAbortedException.java | 24 +-
.../hadoop/hive/metastore/api/TxnInfo.java | 52 +-
.../hive/metastore/api/TxnOpenException.java | 24 +-
.../hadoop/hive/metastore/api/TxnState.java | 2 +-
.../apache/hadoop/hive/metastore/api/Type.java | 72 +-
.../hive/metastore/api/UnknownDBException.java | 24 +-
.../api/UnknownPartitionException.java | 24 +-
.../metastore/api/UnknownTableException.java | 24 +-
.../hive/metastore/api/UnlockRequest.java | 24 +-
.../hadoop/hive/metastore/api/Version.java | 32 +-
.../metastore/api/hive_metastoreConstants.java | 7 +-
.../gen-php/metastore/ThriftHiveMetastore.php | 3328 ++-
.../src/gen/thrift/gen-php/metastore/Types.php | 1081 +-
.../hive_metastore/ThriftHiveMetastore-remote | 609 +-
.../hive_metastore/ThriftHiveMetastore.py | 3726 +++-
.../thrift/gen-py/hive_metastore/constants.py | 2 +-
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 782 +-
.../thrift/gen-rb/hive_metastore_constants.rb | 2 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 2 +-
.../gen/thrift/gen-rb/thrift_hive_metastore.rb | 2 +-
pom.xml | 12 +-
ql/if/queryplan.thrift | 1 +
ql/pom.xml | 6 +
.../gen/thrift/gen-cpp/queryplan_constants.cpp | 2 +-
ql/src/gen/thrift/gen-cpp/queryplan_constants.h | 2 +-
ql/src/gen/thrift/gen-cpp/queryplan_types.cpp | 796 +-
ql/src/gen/thrift/gen-cpp/queryplan_types.h | 294 +-
.../hadoop/hive/ql/plan/api/Adjacency.java | 65 +-
.../hadoop/hive/ql/plan/api/AdjacencyType.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Graph.java | 87 +-
.../hadoop/hive/ql/plan/api/NodeType.java | 2 +-
.../hadoop/hive/ql/plan/api/Operator.java | 142 +-
.../hadoop/hive/ql/plan/api/OperatorType.java | 7 +-
.../apache/hadoop/hive/ql/plan/api/Query.java | 176 +-
.../hadoop/hive/ql/plan/api/QueryPlan.java | 62 +-
.../apache/hadoop/hive/ql/plan/api/Stage.java | 172 +-
.../hadoop/hive/ql/plan/api/StageType.java | 2 +-
.../apache/hadoop/hive/ql/plan/api/Task.java | 182 +-
.../hadoop/hive/ql/plan/api/TaskType.java | 2 +-
ql/src/gen/thrift/gen-php/Types.php | 119 +-
ql/src/gen/thrift/gen-py/queryplan/constants.py | 2 +-
ql/src/gen/thrift/gen-py/queryplan/ttypes.py | 87 +-
ql/src/gen/thrift/gen-rb/queryplan_constants.rb | 2 +-
ql/src/gen/thrift/gen-rb/queryplan_types.rb | 7 +-
.../hive/ql/exec/HashTableSinkOperator.java | 6 +-
.../hadoop/hive/ql/exec/OperatorFactory.java | 11 +
.../ql/exec/SparkHashTableSinkOperator.java | 17 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 1 +
.../persistence/MapJoinTableContainerSerDe.java | 63 +-
.../hive/ql/exec/spark/HashTableLoader.java | 26 +-
.../ql/exec/spark/HiveSparkClientFactory.java | 10 +-
.../hive/ql/exec/spark/KryoSerializer.java | 4 +
.../ql/exec/spark/RemoteHiveSparkClient.java | 52 +-
.../exec/spark/SparkDynamicPartitionPruner.java | 268 +
.../hadoop/hive/ql/exec/spark/SparkPlan.java | 3 -
.../hive/ql/exec/spark/SparkPlanGenerator.java | 15 +-
.../hadoop/hive/ql/exec/spark/SparkTask.java | 1 +
.../hive/ql/exec/spark/SparkUtilities.java | 56 +
.../VectorSparkHashTableSinkOperator.java | 104 +
...VectorSparkPartitionPruningSinkOperator.java | 99 +
.../hive/ql/io/CombineHiveInputFormat.java | 59 +-
.../hadoop/hive/ql/io/HiveInputFormat.java | 46 +-
.../DynamicPartitionPruningOptimization.java | 44 +-
.../ql/optimizer/OperatorComparatorFactory.java | 552 +
.../hadoop/hive/ql/optimizer/Optimizer.java | 2 +-
.../SparkRemoveDynamicPruningBySize.java | 73 +
.../physical/GenSparkSkewJoinProcessor.java | 14 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 23 +
.../spark/CombineEquivalentWorkResolver.java | 292 +
.../spark/SparkPartitionPruningSinkDesc.java | 100 +
.../spark/SparkReduceSinkMapJoinProc.java | 2 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 2 +-
.../ql/parse/spark/GenSparkProcContext.java | 14 +-
.../hive/ql/parse/spark/GenSparkUtils.java | 111 +-
.../parse/spark/OptimizeSparkProcContext.java | 16 +-
.../hive/ql/parse/spark/SparkCompiler.java | 180 +-
.../SparkPartitionPruningSinkOperator.java | 142 +
.../hive/ql/parse/spark/SplitOpTreeForDPP.java | 151 +
.../hadoop/hive/ql/plan/JoinCondDesc.java | 14 +
.../apache/hadoop/hive/ql/plan/JoinDesc.java | 4 +
.../org/apache/hadoop/hive/ql/plan/MapWork.java | 10 +
.../hadoop/hive/ql/plan/ReduceSinkDesc.java | 1 +
.../hive/ql/plan/SparkHashTableSinkDesc.java | 11 +
.../hadoop/hive/ql/plan/TableScanDesc.java | 6 +-
.../hive/ql/ppd/SyntheticJoinPredicate.java | 14 +-
.../queries/clientpositive/dynamic_rdd_cache.q | 111 +
ql/src/test/queries/clientpositive/groupby5.q | 2 +
.../spark_dynamic_partition_pruning.q | 180 +
.../spark_dynamic_partition_pruning_2.q | 118 +
...spark_vectorized_dynamic_partition_pruning.q | 192 +
.../queries/clientpositive/udf_percentile.q | 2 +
.../groupby2_map_skew_multi_distinct.q.out | 9 +
.../spark/groupby2_multi_distinct.q.out | 9 +
.../groupby3_map_skew_multi_distinct.q.out | 9 +
.../spark/groupby3_multi_distinct.q.out | 9 +
.../spark/groupby_grouping_sets7.q.out | 9 +
.../clientpositive/dynamic_rdd_cache.q.out | 1420 ++
.../test/results/clientpositive/groupby5.q.out | 8 +-
.../clientpositive/spark/auto_join18.q.out | 24 +-
.../clientpositive/spark/auto_join30.q.out | 51 +-
.../clientpositive/spark/auto_join32.q.out | 24 +-
.../spark/auto_smb_mapjoin_14.q.out | 30 +-
.../spark/auto_sortmerge_join_10.q.out | 23 +-
.../results/clientpositive/spark/bucket2.q.out | 3 -
.../results/clientpositive/spark/bucket3.q.out | 3 -
.../results/clientpositive/spark/bucket4.q.out | 3 -
.../spark/column_access_stats.q.out | 4 -
.../spark/dynamic_rdd_cache.q.out | 1073 +
.../clientpositive/spark/groupby10.q.out | 32 +-
.../clientpositive/spark/groupby1_map.q.out | 412 +
.../spark/groupby1_map_nomap.q.out | 408 +
.../spark/groupby1_map_skew.q.out | 427 +
.../clientpositive/spark/groupby1_noskew.q.out | 406 +
.../clientpositive/spark/groupby2_map.q.out | 118 +
.../spark/groupby2_map_multi_distinct.q.out | 232 +
.../spark/groupby2_map_skew.q.out | 129 +
.../clientpositive/spark/groupby2_noskew.q.out | 111 +
.../spark/groupby2_noskew_multi_distinct.q.out | 114 +
.../clientpositive/spark/groupby4_map.q.out | 93 +
.../spark/groupby4_map_skew.q.out | 93 +
.../clientpositive/spark/groupby4_noskew.q.out | 104 +
.../results/clientpositive/spark/groupby5.q.out | 433 +
.../clientpositive/spark/groupby5_map.q.out | 95 +
.../spark/groupby5_map_skew.q.out | 95 +
.../clientpositive/spark/groupby5_noskew.q.out | 418 +
.../results/clientpositive/spark/groupby6.q.out | 113 +
.../clientpositive/spark/groupby6_map.q.out | 109 +
.../spark/groupby6_map_skew.q.out | 122 +
.../clientpositive/spark/groupby6_noskew.q.out | 104 +
.../clientpositive/spark/groupby7_map.q.out | 23 +-
.../spark/groupby7_map_skew.q.out | 38 +-
.../clientpositive/spark/groupby7_noskew.q.out | 17 +-
.../groupby7_noskew_multi_single_reducer.q.out | 18 +-
.../results/clientpositive/spark/groupby8.q.out | 62 +-
.../spark/groupby8_map_skew.q.out | 37 +-
.../spark/groupby_grouping_id2.q.out | 230 +
.../clientpositive/spark/groupby_position.q.out | 37 +-
.../spark/groupby_ppr_multi_distinct.q.out | 346 +
.../spark/groupby_resolution.q.out | 796 +
.../clientpositive/spark/insert_into3.q.out | 33 +-
.../results/clientpositive/spark/join18.q.out | 24 +-
.../results/clientpositive/spark/join22.q.out | 19 +-
.../spark/limit_partition_metadataonly.q.out | 2 -
.../clientpositive/spark/limit_pushdown.q.out | 31 +-
.../spark/list_bucket_dml_2.q.java1.7.out | 3 -
.../clientpositive/spark/load_dyn_part14.q.out | 30 +-
.../clientpositive/spark/nullgroup.q.out | 265 +
.../clientpositive/spark/nullgroup2.q.out | 300 +
.../clientpositive/spark/nullgroup4.q.out | 292 +
.../spark/nullgroup4_multi_distinct.q.out | 133 +
.../spark/optimize_nullscan.q.out | 3 -
.../test/results/clientpositive/spark/pcr.q.out | 6 -
.../results/clientpositive/spark/sample3.q.out | 3 -
.../results/clientpositive/spark/sample9.q.out | 3 -
.../clientpositive/spark/skewjoinopt11.q.out | 60 +-
.../clientpositive/spark/skewjoinopt9.q.out | 20 +-
.../clientpositive/spark/smb_mapjoin_11.q.out | 6 -
.../spark/spark_dynamic_partition_pruning.q.out | 5573 +++++
.../spark_dynamic_partition_pruning_2.q.out | 1015 +
...k_vectorized_dynamic_partition_pruning.q.out | 5822 ++++++
.../clientpositive/spark/temp_table_gb1.q.out | 67 +
.../clientpositive/spark/udaf_collect_set.q.out | 212 +
.../clientpositive/spark/udf_example_add.q.out | 3 -
.../clientpositive/spark/udf_in_file.q.out | 3 -
.../results/clientpositive/spark/udf_max.q.out | 62 +
.../results/clientpositive/spark/udf_min.q.out | 62 +
.../clientpositive/spark/udf_percentile.q.out | 450 +
.../results/clientpositive/spark/union10.q.out | 36 +-
.../results/clientpositive/spark/union11.q.out | 38 +-
.../results/clientpositive/spark/union15.q.out | 23 +-
.../results/clientpositive/spark/union16.q.out | 482 +-
.../results/clientpositive/spark/union2.q.out | 22 +-
.../results/clientpositive/spark/union20.q.out | 18 +-
.../results/clientpositive/spark/union25.q.out | 21 +-
.../results/clientpositive/spark/union28.q.out | 21 +-
.../results/clientpositive/spark/union3.q.out | 45 +-
.../results/clientpositive/spark/union30.q.out | 21 +-
.../results/clientpositive/spark/union4.q.out | 18 +-
.../results/clientpositive/spark/union5.q.out | 20 +-
.../results/clientpositive/spark/union9.q.out | 42 +-
.../clientpositive/spark/union_remove_1.q.out | 23 +-
.../clientpositive/spark/union_remove_15.q.out | 23 +-
.../clientpositive/spark/union_remove_16.q.out | 23 +-
.../clientpositive/spark/union_remove_18.q.out | 23 +-
.../clientpositive/spark/union_remove_19.q.out | 75 +-
.../clientpositive/spark/union_remove_20.q.out | 23 +-
.../clientpositive/spark/union_remove_21.q.out | 21 +-
.../clientpositive/spark/union_remove_22.q.out | 46 +-
.../clientpositive/spark/union_remove_24.q.out | 23 +-
.../clientpositive/spark/union_remove_25.q.out | 59 +-
.../clientpositive/spark/union_remove_4.q.out | 23 +-
.../clientpositive/spark/union_remove_6.q.out | 23 +-
.../spark/union_remove_6_subq.q.out | 64 +-
.../clientpositive/spark/union_remove_7.q.out | 23 +-
.../clientpositive/spark/union_top_level.q.out | 59 +-
.../clientpositive/spark/union_view.q.out | 9 -
.../spark/vector_count_distinct.q.out | 31 +-
.../spark/vector_decimal_mapjoin.q.out | 1 +
.../clientpositive/spark/vector_elt.q.out | 7 -
.../spark/vector_left_outer_join.q.out | 2 +
.../spark/vector_mapjoin_reduce.q.out | 1 +
.../spark/vector_string_concat.q.out | 3 -
.../spark/vectorization_decimal_date.q.out | 4 -
.../spark/vectorization_div0.q.out | 3 -
.../clientpositive/spark/vectorized_case.q.out | 3 -
.../spark/vectorized_mapjoin.q.out | 1 +
.../spark/vectorized_math_funcs.q.out | 3 -
.../spark/vectorized_nested_mapjoin.q.out | 2 +
.../spark/vectorized_string_funcs.q.out | 3 -
.../results/clientpositive/udf_percentile.q.out | 104 +-
.../gen/thrift/gen-cpp/complex_constants.cpp | 2 +-
.../src/gen/thrift/gen-cpp/complex_constants.h | 2 +-
serde/src/gen/thrift/gen-cpp/complex_types.cpp | 442 +-
serde/src/gen/thrift/gen-cpp/complex_types.h | 174 +-
.../gen/thrift/gen-cpp/megastruct_constants.cpp | 2 +-
.../gen/thrift/gen-cpp/megastruct_constants.h | 2 +-
.../src/gen/thrift/gen-cpp/megastruct_types.cpp | 585 +-
serde/src/gen/thrift/gen-cpp/megastruct_types.h | 175 +-
.../src/gen/thrift/gen-cpp/serde_constants.cpp | 2 +-
serde/src/gen/thrift/gen-cpp/serde_constants.h | 2 +-
serde/src/gen/thrift/gen-cpp/serde_types.cpp | 5 +-
serde/src/gen/thrift/gen-cpp/serde_types.h | 5 +-
.../gen/thrift/gen-cpp/testthrift_constants.cpp | 2 +-
.../gen/thrift/gen-cpp/testthrift_constants.h | 2 +-
.../src/gen/thrift/gen-cpp/testthrift_types.cpp | 95 +-
serde/src/gen/thrift/gen-cpp/testthrift_types.h | 45 +-
.../hadoop/hive/serde/serdeConstants.java | 7 +-
.../hadoop/hive/serde/test/InnerStruct.java | 24 +-
.../hadoop/hive/serde/test/ThriftTestObj.java | 62 +-
.../hadoop/hive/serde2/thrift/test/Complex.java | 279 +-
.../hive/serde2/thrift/test/IntString.java | 40 +-
.../hive/serde2/thrift/test/MegaStruct.java | 521 +-
.../hive/serde2/thrift/test/MiniStruct.java | 38 +-
.../hadoop/hive/serde2/thrift/test/MyEnum.java | 2 +-
.../hive/serde2/thrift/test/PropValueUnion.java | 60 +-
.../hive/serde2/thrift/test/SetIntString.java | 54 +-
serde/src/gen/thrift/gen-php/Types.php | 15 +-
.../org/apache/hadoop/hive/serde/Types.php | 373 +-
.../src/gen/thrift/gen-py/complex/constants.py | 2 +-
serde/src/gen/thrift/gen-py/complex/ttypes.py | 50 +-
.../gen/thrift/gen-py/megastruct/constants.py | 2 +-
.../src/gen/thrift/gen-py/megastruct/ttypes.py | 44 +-
.../org_apache_hadoop_hive_serde/constants.py | 2 +-
.../org_apache_hadoop_hive_serde/ttypes.py | 2 +-
.../gen/thrift/gen-py/testthrift/constants.py | 2 +-
.../src/gen/thrift/gen-py/testthrift/ttypes.py | 14 +-
.../src/gen/thrift/gen-rb/complex_constants.rb | 2 +-
serde/src/gen/thrift/gen-rb/complex_types.rb | 2 +-
.../gen/thrift/gen-rb/megastruct_constants.rb | 2 +-
serde/src/gen/thrift/gen-rb/megastruct_types.rb | 2 +-
serde/src/gen/thrift/gen-rb/serde_constants.rb | 2 +-
serde/src/gen/thrift/gen-rb/serde_types.rb | 2 +-
.../gen/thrift/gen-rb/testthrift_constants.rb | 2 +-
serde/src/gen/thrift/gen-rb/testthrift_types.rb | 2 +-
.../lazy/fast/LazySimpleDeserializeRead.java | 4 +-
service/src/gen/thrift/gen-cpp/TCLIService.cpp | 458 +-
service/src/gen/thrift/gen-cpp/TCLIService.h | 821 +-
.../thrift/gen-cpp/TCLIService_constants.cpp | 2 +-
.../gen/thrift/gen-cpp/TCLIService_constants.h | 2 +-
.../gen/thrift/gen-cpp/TCLIService_types.cpp | 3250 ++-
.../src/gen/thrift/gen-cpp/TCLIService_types.h | 1482 +-
service/src/gen/thrift/gen-cpp/ThriftHive.cpp | 286 +-
service/src/gen/thrift/gen-cpp/ThriftHive.h | 389 +-
.../thrift/gen-cpp/hive_service_constants.cpp | 2 +-
.../gen/thrift/gen-cpp/hive_service_constants.h | 2 +-
.../gen/thrift/gen-cpp/hive_service_types.cpp | 110 +-
.../src/gen/thrift/gen-cpp/hive_service_types.h | 75 +-
.../hadoop/hive/service/HiveClusterStatus.java | 68 +-
.../hive/service/HiveServerException.java | 40 +-
.../hadoop/hive/service/JobTrackerState.java | 2 +-
.../apache/hadoop/hive/service/ThriftHive.java | 914 +-
.../service/cli/thrift/TArrayTypeEntry.java | 24 +-
.../hive/service/cli/thrift/TBinaryColumn.java | 64 +-
.../hive/service/cli/thrift/TBoolColumn.java | 62 +-
.../hive/service/cli/thrift/TBoolValue.java | 26 +-
.../hive/service/cli/thrift/TByteColumn.java | 62 +-
.../hive/service/cli/thrift/TByteValue.java | 26 +-
.../hive/service/cli/thrift/TCLIService.java | 1734 +-
.../cli/thrift/TCLIServiceConstants.java | 7 +-
.../cli/thrift/TCancelDelegationTokenReq.java | 32 +-
.../cli/thrift/TCancelDelegationTokenResp.java | 24 +-
.../service/cli/thrift/TCancelOperationReq.java | 24 +-
.../cli/thrift/TCancelOperationResp.java | 24 +-
.../service/cli/thrift/TCloseOperationReq.java | 24 +-
.../service/cli/thrift/TCloseOperationResp.java | 24 +-
.../service/cli/thrift/TCloseSessionReq.java | 24 +-
.../service/cli/thrift/TCloseSessionResp.java | 24 +-
.../apache/hive/service/cli/thrift/TColumn.java | 20 +-
.../hive/service/cli/thrift/TColumnDesc.java | 50 +-
.../hive/service/cli/thrift/TColumnValue.java | 20 +-
.../hive/service/cli/thrift/TDoubleColumn.java | 62 +-
.../hive/service/cli/thrift/TDoubleValue.java | 26 +-
.../cli/thrift/TExecuteStatementReq.java | 87 +-
.../cli/thrift/TExecuteStatementResp.java | 34 +-
.../service/cli/thrift/TFetchOrientation.java | 2 +-
.../service/cli/thrift/TFetchResultsReq.java | 54 +-
.../service/cli/thrift/TFetchResultsResp.java | 42 +-
.../service/cli/thrift/TGetCatalogsReq.java | 24 +-
.../service/cli/thrift/TGetCatalogsResp.java | 34 +-
.../hive/service/cli/thrift/TGetColumnsReq.java | 58 +-
.../service/cli/thrift/TGetColumnsResp.java | 34 +-
.../cli/thrift/TGetDelegationTokenReq.java | 40 +-
.../cli/thrift/TGetDelegationTokenResp.java | 34 +-
.../service/cli/thrift/TGetFunctionsReq.java | 50 +-
.../service/cli/thrift/TGetFunctionsResp.java | 34 +-
.../hive/service/cli/thrift/TGetInfoReq.java | 36 +-
.../hive/service/cli/thrift/TGetInfoResp.java | 32 +-
.../hive/service/cli/thrift/TGetInfoType.java | 2 +-
.../hive/service/cli/thrift/TGetInfoValue.java | 20 +-
.../cli/thrift/TGetOperationStatusReq.java | 24 +-
.../cli/thrift/TGetOperationStatusResp.java | 62 +-
.../cli/thrift/TGetResultSetMetadataReq.java | 24 +-
.../cli/thrift/TGetResultSetMetadataResp.java | 34 +-
.../hive/service/cli/thrift/TGetSchemasReq.java | 42 +-
.../service/cli/thrift/TGetSchemasResp.java | 34 +-
.../service/cli/thrift/TGetTableTypesReq.java | 24 +-
.../service/cli/thrift/TGetTableTypesResp.java | 34 +-
.../hive/service/cli/thrift/TGetTablesReq.java | 79 +-
.../hive/service/cli/thrift/TGetTablesResp.java | 34 +-
.../service/cli/thrift/TGetTypeInfoReq.java | 24 +-
.../service/cli/thrift/TGetTypeInfoResp.java | 34 +-
.../service/cli/thrift/THandleIdentifier.java | 50 +-
.../hive/service/cli/thrift/TI16Column.java | 62 +-
.../hive/service/cli/thrift/TI16Value.java | 26 +-
.../hive/service/cli/thrift/TI32Column.java | 62 +-
.../hive/service/cli/thrift/TI32Value.java | 26 +-
.../hive/service/cli/thrift/TI64Column.java | 62 +-
.../hive/service/cli/thrift/TI64Value.java | 26 +-
.../hive/service/cli/thrift/TMapTypeEntry.java | 32 +-
.../service/cli/thrift/TOpenSessionReq.java | 91 +-
.../service/cli/thrift/TOpenSessionResp.java | 91 +-
.../service/cli/thrift/TOperationHandle.java | 54 +-
.../service/cli/thrift/TOperationState.java | 2 +-
.../hive/service/cli/thrift/TOperationType.java | 2 +-
.../service/cli/thrift/TPrimitiveTypeEntry.java | 38 +-
.../service/cli/thrift/TProtocolVersion.java | 2 +-
.../cli/thrift/TRenewDelegationTokenReq.java | 32 +-
.../cli/thrift/TRenewDelegationTokenResp.java | 24 +-
.../apache/hive/service/cli/thrift/TRow.java | 46 +-
.../apache/hive/service/cli/thrift/TRowSet.java | 86 +-
.../hive/service/cli/thrift/TSessionHandle.java | 24 +-
.../apache/hive/service/cli/thrift/TStatus.java | 83 +-
.../hive/service/cli/thrift/TStatusCode.java | 2 +-
.../hive/service/cli/thrift/TStringColumn.java | 62 +-
.../hive/service/cli/thrift/TStringValue.java | 26 +-
.../service/cli/thrift/TStructTypeEntry.java | 50 +-
.../hive/service/cli/thrift/TTableSchema.java | 46 +-
.../hive/service/cli/thrift/TTypeDesc.java | 46 +-
.../hive/service/cli/thrift/TTypeEntry.java | 20 +-
.../apache/hive/service/cli/thrift/TTypeId.java | 2 +-
.../service/cli/thrift/TTypeQualifierValue.java | 20 +-
.../service/cli/thrift/TTypeQualifiers.java | 54 +-
.../service/cli/thrift/TUnionTypeEntry.java | 50 +-
.../cli/thrift/TUserDefinedTypeEntry.java | 24 +-
service/src/gen/thrift/gen-php/TCLIService.php | 269 +-
service/src/gen/thrift/gen-php/ThriftHive.php | 125 +-
service/src/gen/thrift/gen-php/Types.php | 30 +-
.../gen-py/TCLIService/TCLIService-remote | 102 +-
.../thrift/gen-py/TCLIService/TCLIService.py | 439 +-
.../gen/thrift/gen-py/TCLIService/constants.py | 2 +-
.../src/gen/thrift/gen-py/TCLIService/ttypes.py | 478 +-
.../gen-py/hive_service/ThriftHive-remote | 1014 +-
.../thrift/gen-py/hive_service/ThriftHive.py | 266 +-
.../gen/thrift/gen-py/hive_service/constants.py | 2 +-
.../gen/thrift/gen-py/hive_service/ttypes.py | 19 +-
.../gen/thrift/gen-rb/hive_service_constants.rb | 2 +-
.../src/gen/thrift/gen-rb/hive_service_types.rb | 2 +-
.../src/gen/thrift/gen-rb/t_c_l_i_service.rb | 2 +-
.../thrift/gen-rb/t_c_l_i_service_constants.rb | 2 +-
.../gen/thrift/gen-rb/t_c_l_i_service_types.rb | 2 +-
service/src/gen/thrift/gen-rb/thrift_hive.rb | 2 +-
.../cli/thrift/ThriftBinaryCLIService.java | 1 +
spark-client/pom.xml | 5 +
.../hive/spark/client/SparkClientImpl.java | 21 +-
.../hive/spark/client/SparkClientUtilities.java | 13 +-
.../hive/spark/client/TestSparkClient.java | 4 +-
497 files changed, 79181 insertions(+), 24699 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/25199156/ql/pom.xml
----------------------------------------------------------------------
[3/5] hive git commit: HIVE-11425 - submitting a query via CLI
against... (Eugene Koifman, reviewed by Prasanth Jayachandran)
Posted by xu...@apache.org.
HIVE-11425 - submitting a query via CLI against... (Eugene Koifman, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc528ba3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc528ba3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc528ba3
Branch: refs/heads/spark
Commit: bc528ba35d58af61f4d854003d99af50818f909a
Parents: 8c0016a
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Fri Jul 31 13:20:23 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Fri Jul 31 13:22:14 2015 -0700
----------------------------------------------------------------------
ql/pom.xml | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bc528ba3/ql/pom.xml
----------------------------------------------------------------------
diff --git a/ql/pom.xml b/ql/pom.xml
index 6026c49..fc66591 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -707,6 +707,7 @@
<include>org.apache.hive.shims:hive-shims-0.23</include>
<include>org.apache.hive.shims:hive-shims-0.23</include>
<include>org.apache.hive.shims:hive-shims-common</include>
+ <include>org.apache.hive:hive-storage-api</include>
<include>com.googlecode.javaewah:JavaEWAH</include>
<include>javolution:javolution</include>
<include>com.google.protobuf:protobuf-java</include>
[2/5] hive git commit: HIVE-11384 : Add Test case which cover both
HIVE-11271 and HIVE-11333 (Yongzhi Chen via Szehon)
Posted by xu...@apache.org.
HIVE-11384 : Add Test case which cover both HIVE-11271 and HIVE-11333 (Yongzhi Chen via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c0016aa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c0016aa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c0016aa
Branch: refs/heads/spark
Commit: 8c0016aa6eee33777ff38363c307bc00b9081770
Parents: 724b319
Author: Szehon Ho <sz...@cloudera.com>
Authored: Fri Jul 31 12:07:34 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Fri Jul 31 12:07:34 2015 -0700
----------------------------------------------------------------------
.../clientpositive/unionall_unbalancedppd.q | 72 +++++
.../clientpositive/unionall_unbalancedppd.q.out | 280 +++++++++++++++++++
2 files changed, 352 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8c0016aa/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
index 0825c2d..ab0a70b 100644
--- a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
+++ b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
@@ -102,6 +102,25 @@ from union_all_bug_test_2
) A
WHERE (filter = 1 or filter = 0);
+explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1);
+
SELECT f1
FROM (
@@ -118,3 +137,56 @@ f1
from union_all_bug_test_2
) A
WHERE (f1 = 1);
+
+explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1 and filter = 1);
http://git-wip-us.apache.org/repos/asf/hive/blob/8c0016aa/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
index 46828e9..88a6f30 100644
--- a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
+++ b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
@@ -329,6 +329,105 @@ POSTHOOK: Input: default@union_all_bug_test_2
0
1
0
+PREHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: union_all_bug_test_1
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (f1 = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: union_all_bug_test_2
+ Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (f1 = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: SELECT f1
FROM (
@@ -371,3 +470,184 @@ POSTHOOK: Input: default@union_all_bug_test_2
#### A masked pattern was here ####
1
1
+PREHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: union_all_bug_test_1
+ Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((if(true, f1, f2) = 1) and (f1 = 1)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: union_all_bug_test_2
+ Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: false (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1 and filter = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1 and filter = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1