You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/16 19:07:03 UTC

[1/5] hive git commit: HIVE-11401: Predicate push down does not work with Parquet when partitions are in the expression (Sergio Pena, reviewed by Szehon Ho)

Repository: hive
Updated Branches:
  refs/heads/spark a8c49ef41 -> f78f66359


HIVE-11401: Predicate push down does not work with Parquet when partitions are in the expression (Sergio Pena, reviewed by Szehon Ho)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/724b3193
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/724b3193
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/724b3193

Branch: refs/heads/spark
Commit: 724b31930718eea606dfe6d95eda7385209caa5f
Parents: 7df9d7a
Author: Sergio Pena <se...@cloudera.com>
Authored: Fri Jul 31 09:48:28 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Fri Jul 31 09:48:28 2015 -0500

----------------------------------------------------------------------
 .../read/ParquetFilterPredicateConverter.java   | 148 +++++++++++++++++++
 .../read/ParquetRecordReaderWrapper.java        | 122 ++-------------
 .../parquet/TestParquetRecordReaderWrapper.java |  14 +-
 .../read/TestParquetFilterPredicate.java        |  51 +++++++
 .../ql/io/sarg/TestConvertAstToSearchArg.java   |  25 ++--
 .../clientpositive/parquet_predicate_pushdown.q |   9 ++
 .../parquet_predicate_pushdown.q.out            |  47 ++++++
 7 files changed, 283 insertions(+), 133 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
new file mode 100644
index 0000000..f170026
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.read;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
+import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
+import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.parquet.filter2.predicate.FilterApi;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Type;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class ParquetFilterPredicateConverter {
+  private static final Log LOG = LogFactory.getLog(ParquetFilterPredicateConverter.class);
+
+  /**
+   * Translate the search argument to the filter predicate parquet uses
+   * @return translate the sarg into a filter predicate
+   */
+  public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
+    return toFilterPredicate(sarg, null);
+  }
+
+  /**
+   * Translate the search argument to the filter predicate parquet uses. It includes
+   * only the columns from the passed schema.
+   * @return translate the sarg into a filter predicate
+   */
+  public static FilterPredicate toFilterPredicate(SearchArgument sarg, MessageType schema) {
+    Set<String> columns = null;
+    if (schema != null) {
+      columns = new HashSet<String>();
+      for (Type field : schema.getFields()) {
+        columns.add(field.getName());
+      }
+    }
+
+    return translate(sarg.getExpression(), sarg.getLeaves(), columns);
+  }
+
+  private static FilterPredicate translate(ExpressionTree root, List<PredicateLeaf> leaves, Set<String> columns) {
+    FilterPredicate p = null;
+    switch (root.getOperator()) {
+      case OR:
+        for(ExpressionTree child: root.getChildren()) {
+          if (p == null) {
+            p = translate(child, leaves, columns);
+          } else {
+            FilterPredicate right = translate(child, leaves, columns);
+            // constant means no filter, ignore it when it is null
+            if(right != null){
+              p = FilterApi.or(p, right);
+            }
+          }
+        }
+        return p;
+      case AND:
+        for(ExpressionTree child: root.getChildren()) {
+          if (p == null) {
+            p = translate(child, leaves, columns);
+          } else {
+            FilterPredicate right = translate(child, leaves, columns);
+            // constant means no filter, ignore it when it is null
+            if(right != null){
+              p = FilterApi.and(p, right);
+            }
+          }
+        }
+        return p;
+      case NOT:
+        FilterPredicate op = translate(root.getChildren().get(0), leaves, columns);
+        if (op != null) {
+          return FilterApi.not(op);
+        } else {
+          return null;
+        }
+      case LEAF:
+        PredicateLeaf leaf = leaves.get(root.getLeaf());
+
+        // If columns is null, then we need to create the leaf
+        if (columns == null || columns.contains(leaf.getColumnName())) {
+          return buildFilterPredicateFromPredicateLeaf(leaf);
+        } else {
+          // Do not create predicate if the leaf is not on the passed schema.
+          return null;
+        }
+      case CONSTANT:
+        return null;// no filter will be executed for constant
+      default:
+        throw new IllegalStateException("Unknown operator: " +
+            root.getOperator());
+    }
+  }
+
+  private static FilterPredicate buildFilterPredicateFromPredicateLeaf
+      (PredicateLeaf leaf) {
+    LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
+    FilterPredicateLeafBuilder builder;
+    try {
+      builder = leafFilterFactory
+          .getLeafFilterBuilderByType(leaf.getType());
+      if (builder == null) {
+        return null;
+      }
+      if (isMultiLiteralsOperator(leaf.getOperator())) {
+        return builder.buildPredicate(leaf.getOperator(),
+            leaf.getLiteralList(),
+            leaf.getColumnName());
+      } else {
+        return builder
+            .buildPredict(leaf.getOperator(),
+                leaf.getLiteral(),
+                leaf.getColumnName());
+      }
+    } catch (Exception e) {
+      LOG.error("fail to build predicate filter leaf with errors" + e, e);
+      return null;
+    }
+  }
+
+  private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
+    return (op == PredicateLeaf.Operator.IN) ||
+        (op == PredicateLeaf.Operator.BETWEEN);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
index 49e52da..f689b90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
@@ -22,17 +22,10 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.IOConstants;
-import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
-import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
 import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
-import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Writable;
@@ -46,7 +39,6 @@ import org.apache.hadoop.mapreduce.TaskAttemptID;
 
 import org.apache.parquet.filter2.compat.FilterCompat;
 import org.apache.parquet.filter2.compat.RowGroupFilter;
-import org.apache.parquet.filter2.predicate.FilterApi;
 import org.apache.parquet.filter2.predicate.FilterPredicate;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.ParquetInputFormat;
@@ -57,6 +49,7 @@ import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.FileMetaData;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.hadoop.util.ContextUtil;
+import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 
 import com.google.common.base.Strings;
@@ -139,26 +132,23 @@ public class ParquetRecordReaderWrapper  implements RecordReader<NullWritable, A
     }
   }
 
-  public FilterCompat.Filter setFilter(final JobConf conf) {
-    String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
-    String columnNamesString =
-      conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
-    if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() ||
-      columnNamesString.isEmpty()) {
+  public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) {
+    SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf);
+    if (sarg == null) {
       return null;
     }
 
-    SearchArgument sarg =
-        ConvertAstToSearchArg.create(Utilities.deserializeExpression
-            (serializedPushdown));
-    FilterPredicate p = toFilterPredicate(sarg);
+    // Create the Parquet FilterPredicate without including columns that do not exist
+    // on the shema (such as partition columns).
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
     if (p != null) {
-      LOG.debug("Predicate filter for parquet is " + p.toString());
+      // Filter may have sensitive information. Do not send to debug.
+      LOG.debug("PARQUET predicate push down generated.");
       ParquetInputFormat.setFilterPredicate(conf, p);
       return FilterCompat.get(p);
     } else {
-      LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR +
-        " with the value of " + serializedPushdown);
+      // Filter may have sensitive information. Do not send to debug.
+      LOG.debug("No PARQUET predicate push down is generated.");
       return null;
     }
   }
@@ -250,7 +240,6 @@ public class ParquetRecordReaderWrapper  implements RecordReader<NullWritable, A
     if (oldSplit instanceof FileSplit) {
       final Path finalPath = ((FileSplit) oldSplit).getPath();
       jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent());
-      FilterCompat.Filter filter = setFilter(jobConf);
 
       final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath);
       final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
@@ -274,6 +263,7 @@ public class ParquetRecordReaderWrapper  implements RecordReader<NullWritable, A
         return null;
       }
 
+      FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema());
       if (filter != null) {
         filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema());
         if (filtedBlocks.isEmpty()) {
@@ -310,92 +300,4 @@ public class ParquetRecordReaderWrapper  implements RecordReader<NullWritable, A
   public List<BlockMetaData> getFiltedBlocks() {
     return filtedBlocks;
   }
-
-  /**
-   * Translate the search argument to the filter predicate parquet used
-   * @return translate the sarg into a filter predicate
-   */
-  public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
-    return translate(sarg.getExpression(), sarg.getLeaves());
-  }
-
-  private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
-    return (op == PredicateLeaf.Operator.IN) ||
-        (op == PredicateLeaf.Operator.BETWEEN);
-  }
-
-  private static FilterPredicate translate(ExpressionTree root,
-                                           List<PredicateLeaf> leafs){
-    FilterPredicate p = null;
-    switch (root.getOperator()) {
-      case OR:
-        for(ExpressionTree child: root.getChildren()) {
-          if (p == null) {
-            p = translate(child, leafs);
-          } else {
-            FilterPredicate right = translate(child, leafs);
-            // constant means no filter, ignore it when it is null
-            if(right != null){
-              p = FilterApi.or(p, right);
-            }
-          }
-        }
-        return p;
-      case AND:
-        for(ExpressionTree child: root.getChildren()) {
-          if (p == null) {
-            p = translate(child, leafs);
-          } else {
-            FilterPredicate right = translate(child, leafs);
-            // constant means no filter, ignore it when it is null
-            if(right != null){
-              p = FilterApi.and(p, right);
-            }
-          }
-        }
-        return p;
-      case NOT:
-        FilterPredicate op = translate(root.getChildren().get(0), leafs);
-        if (op != null) {
-          return FilterApi.not(op);
-        } else {
-          return null;
-        }
-      case LEAF:
-        return buildFilterPredicateFromPredicateLeaf(leafs.get(root.getLeaf()));
-      case CONSTANT:
-        return null;// no filter will be executed for constant
-      default:
-        throw new IllegalStateException("Unknown operator: " +
-            root.getOperator());
-    }
-  }
-
-  private static FilterPredicate buildFilterPredicateFromPredicateLeaf
-          (PredicateLeaf leaf) {
-    LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
-    FilterPredicateLeafBuilder builder;
-    try {
-      builder = leafFilterFactory
-          .getLeafFilterBuilderByType(leaf.getType());
-      if (builder == null) {
-        return null;
-      }
-      if (isMultiLiteralsOperator(leaf.getOperator())) {
-        return builder.buildPredicate(leaf.getOperator(),
-            leaf.getLiteralList(),
-            leaf.getColumnName());
-      } else {
-        return builder
-            .buildPredict(leaf.getOperator(),
-                leaf.getLiteral(),
-                leaf.getColumnName());
-      }
-    } catch (Exception e) {
-      LOG.error("fail to build predicate filter leaf with errors" + e, e);
-      return null;
-    }
-  }
-
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
index 87dd344..f9ca528 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
@@ -22,7 +22,7 @@ import static junit.framework.Assert.assertEquals;
 
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -56,7 +56,7 @@ public class TestParquetRecordReaderWrapper {
         .end()
         .build();
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected =
       "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
         "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
@@ -76,7 +76,7 @@ public class TestParquetRecordReaderWrapper {
             .end()
             .build();
     assertEquals("lteq(y, Binary{\"hi        \"})",
-        ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString());
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
@@ -91,7 +91,7 @@ public class TestParquetRecordReaderWrapper {
         .end()
         .build();
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected =
         "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
         "not(eq(a, Binary{\"stinger\"})))";
@@ -111,7 +111,7 @@ public class TestParquetRecordReaderWrapper {
             .end()
             .build();
     assertEquals("lteq(y, Binary{\"hi        \"})",
-        ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString());
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
@@ -126,7 +126,7 @@ public class TestParquetRecordReaderWrapper {
         .end()
         .build();
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
         "not(eq(a, Binary{\"stinger\"})))";
     assertEquals(expected, p.toString());
@@ -146,7 +146,7 @@ public class TestParquetRecordReaderWrapper {
             .end()
             .build();
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," +
         " lteq(y, Binary{\"hi        \"})), eq(z, " +
         "0.22)), eq(z1, 0.22))";

http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
new file mode 100644
index 0000000..847a02b
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.parquet.read;
+
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.junit.Test;
+
+import static junit.framework.Assert.assertEquals;
+
+public class TestParquetFilterPredicate {
+  @Test
+  public void testFilterColumnsThatDoNoExistOnSchema() {
+    MessageType schema = MessageTypeParser.parseMessageType("message test { required int32 a; required binary stinger; }");
+    SearchArgument sarg = SearchArgumentFactory.newBuilder()
+        .startNot()
+        .startOr()
+        .isNull("a", PredicateLeaf.Type.INTEGER)
+        .between("y", PredicateLeaf.Type.INTEGER, 10, 20) // Column will be removed from filter
+        .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) // Column will be removed from filter
+        .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
+        .end()
+        .end()
+        .build();
+
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
+
+    String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
+    assertEquals(expected, p.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
index 85e952f..9e8425a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
@@ -24,22 +24,15 @@ import static junit.framework.Assert.assertTrue;
 
 import com.google.common.collect.Sets;
 
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.PredicateLeafImpl;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.junit.Test;
 
 import java.beans.XMLDecoder;
 import java.io.ByteArrayInputStream;
 import java.io.UnsupportedEncodingException;
-import java.lang.reflect.Field;
-import java.sql.Date;
-import java.sql.Timestamp;
 import java.util.List;
 import java.util.Set;
 
@@ -557,7 +550,7 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(9, leaves.size());
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String[] conditions = new String[]{
       "eq(first_name, Binary{\"john\"})",    /* first_name = 'john' */
       "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
@@ -849,7 +842,7 @@ public class TestConvertAstToSearchArg {
       "lteq(id, 4)"                         /* id <= 4             */
     };
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
     assertEquals(expected, p.toString());
 
@@ -1279,7 +1272,7 @@ public class TestConvertAstToSearchArg {
       "eq(last_name, Binary{\"smith\"})"    /* 'smith' = last_name  */
     };
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
     assertEquals(expected, p.toString());
 
@@ -1500,7 +1493,7 @@ public class TestConvertAstToSearchArg {
       "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
     };
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
     assertEquals(expected, p.toString());
 
@@ -1759,7 +1752,7 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(1, leaves.size());
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected =
       "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
     assertEquals(p.toString(), expected);
@@ -2239,7 +2232,7 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(9, leaves.size());
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
       "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
       "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
@@ -2395,7 +2388,7 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(0, leaves.size());
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     assertNull(p);
 
     assertEquals("YES_NO_NULL",
@@ -2650,7 +2643,7 @@ public class TestConvertAstToSearchArg {
     List<PredicateLeaf> leaves = sarg.getLeaves();
     assertEquals(1, leaves.size());
 
-    FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg);
+    FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
     String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
     assertEquals(expected, p.toString());
 

http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
new file mode 100644
index 0000000..08af84f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
@@ -0,0 +1,9 @@
+SET hive.optimize.index.filter=true;
+SET hive.optimize.ppd=true;
+
+-- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
+ALTER TABLE part1 ADD PARTITION (p='p1');
+INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
+SELECT * FROM part1 WHERE p='p1';
+DROP TABLE part1 PURGE;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/724b3193/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
new file mode 100644
index 0000000..4186618
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
@@ -0,0 +1,47 @@
+PREHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part1
+POSTHOOK: query: -- Test predicate with partitioned columns
+CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part1
+PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@part1
+POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@part1
+POSTHOOK: Output: default@part1@p=p1
+PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@part1@p=p1
+POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@part1@p=p1
+POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part1
+PREHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part1
+POSTHOOK: Input: default@part1@p=p1
+#### A masked pattern was here ####
+1	a	p1
+2	b	p1
+PREHOOK: query: DROP TABLE part1 PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@part1
+PREHOOK: Output: default@part1
+POSTHOOK: query: DROP TABLE part1 PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@part1
+POSTHOOK: Output: default@part1

[5/5] hive git commit: Merge branch 'master' into spark

Posted by xu...@apache.org.

Merge branch 'master' into spark


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f78f6635
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f78f6635
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f78f6635

Branch: refs/heads/spark
Commit: f78f66359cdbd7963c3bdfbc65663010f3531719
Parents: a8c49ef 2519915
Author: xzhang <xz...@xzdt>
Authored: Wed Sep 16 10:00:19 2015 -0700
Committer: xzhang <xz...@xzdt>
Committed: Wed Sep 16 10:00:19 2015 -0700

----------------------------------------------------------------------
 ql/pom.xml                                      |   1 +
 .../read/ParquetFilterPredicateConverter.java   | 148 ++++++++++
 .../read/ParquetRecordReaderWrapper.java        | 122 +-------
 .../parquet/TestParquetRecordReaderWrapper.java |  14 +-
 .../read/TestParquetFilterPredicate.java        |  51 ++++
 .../ql/io/sarg/TestConvertAstToSearchArg.java   |  25 +-
 .../clientpositive/parquet_predicate_pushdown.q |   9 +
 .../clientpositive/unionall_unbalancedppd.q     |  72 +++++
 .../parquet_predicate_pushdown.q.out            |  47 ++++
 .../clientpositive/unionall_unbalancedppd.q.out | 280 +++++++++++++++++++
 10 files changed, 636 insertions(+), 133 deletions(-)
----------------------------------------------------------------------

[4/5] hive git commit: HIVE-10166: Merge Spark branch to master 7/30/2015 (reviewed by Chao)

Posted by xu...@apache.org.

HIVE-10166: Merge Spark branch to master 7/30/2015 (reviewed by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/25199156
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/25199156
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/25199156

Branch: refs/heads/spark
Commit: 251991568c5e9e38b3480e9ef5dc972b9da112db
Parents: bc528ba 714b3db
Author: xzhang <xz...@xzdt>
Authored: Fri Jul 31 15:55:04 2015 -0700
Committer: xzhang <xz...@xzdt>
Committed: Fri Jul 31 15:55:04 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    12 +
 itests/qtest-spark/pom.xml                      |    24 +
 itests/qtest/pom.xml                            |     2 +-
 .../test/resources/testconfiguration.properties |    48 +-
 .../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp  |  4708 ++++-
 .../gen/thrift/gen-cpp/ThriftHiveMetastore.h    |  7807 +++----
 .../thrift/gen-cpp/hive_metastore_constants.cpp |     2 +-
 .../thrift/gen-cpp/hive_metastore_constants.h   |     2 +-
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp |  6044 +++++-
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |  2619 ++-
 .../hive/metastore/api/AbortTxnRequest.java     |    24 +-
 .../metastore/api/AddDynamicPartitions.java     |    69 +-
 .../metastore/api/AddPartitionsRequest.java     |    80 +-
 .../hive/metastore/api/AddPartitionsResult.java |    48 +-
 .../hadoop/hive/metastore/api/AggrStats.java    |    54 +-
 .../metastore/api/AlreadyExistsException.java   |    24 +-
 .../metastore/api/BinaryColumnStatsData.java    |    40 +-
 .../metastore/api/BooleanColumnStatsData.java   |    40 +-
 .../hive/metastore/api/CheckLockRequest.java    |    24 +-
 .../hive/metastore/api/ColumnStatistics.java    |    54 +-
 .../metastore/api/ColumnStatisticsData.java     |    20 +-
 .../metastore/api/ColumnStatisticsDesc.java     |    58 +-
 .../hive/metastore/api/ColumnStatisticsObj.java |    40 +-
 .../hive/metastore/api/CommitTxnRequest.java    |    24 +-
 .../hive/metastore/api/CompactionRequest.java   |    62 +-
 .../hive/metastore/api/CompactionType.java      |     2 +-
 .../api/ConfigValSecurityException.java         |    24 +-
 .../api/CurrentNotificationEventId.java         |    24 +-
 .../hadoop/hive/metastore/api/Database.java     |   115 +-
 .../apache/hadoop/hive/metastore/api/Date.java  |    24 +-
 .../hive/metastore/api/DateColumnStatsData.java |    50 +-
 .../hadoop/hive/metastore/api/Decimal.java      |    41 +-
 .../metastore/api/DecimalColumnStatsData.java   |    50 +-
 .../metastore/api/DoubleColumnStatsData.java    |    50 +-
 .../hive/metastore/api/DropPartitionsExpr.java  |    43 +-
 .../metastore/api/DropPartitionsRequest.java    |    82 +-
 .../metastore/api/DropPartitionsResult.java     |    48 +-
 .../hive/metastore/api/EnvironmentContext.java  |    61 +-
 .../hive/metastore/api/EventRequestType.java    |     2 +-
 .../hadoop/hive/metastore/api/FieldSchema.java  |    58 +-
 .../hive/metastore/api/FireEventRequest.java    |    79 +-
 .../metastore/api/FireEventRequestData.java     |    20 +-
 .../hive/metastore/api/FireEventResponse.java   |    16 +-
 .../hadoop/hive/metastore/api/Function.java     |   110 +-
 .../hadoop/hive/metastore/api/FunctionType.java |     2 +-
 .../metastore/api/GetOpenTxnsInfoResponse.java  |    54 +-
 .../hive/metastore/api/GetOpenTxnsResponse.java |    53 +-
 .../api/GetPrincipalsInRoleRequest.java         |    24 +-
 .../api/GetPrincipalsInRoleResponse.java        |    46 +-
 .../api/GetRoleGrantsForPrincipalRequest.java   |    36 +-
 .../api/GetRoleGrantsForPrincipalResponse.java  |    46 +-
 .../api/GrantRevokePrivilegeRequest.java        |    46 +-
 .../api/GrantRevokePrivilegeResponse.java       |    26 +-
 .../metastore/api/GrantRevokeRoleRequest.java   |    86 +-
 .../metastore/api/GrantRevokeRoleResponse.java  |    26 +-
 .../hive/metastore/api/GrantRevokeType.java     |     2 +-
 .../hive/metastore/api/HeartbeatRequest.java    |    34 +-
 .../metastore/api/HeartbeatTxnRangeRequest.java |    32 +-
 .../api/HeartbeatTxnRangeResponse.java          |    74 +-
 .../hive/metastore/api/HiveObjectPrivilege.java |    52 +-
 .../hive/metastore/api/HiveObjectRef.java       |    81 +-
 .../hive/metastore/api/HiveObjectType.java      |     2 +-
 .../apache/hadoop/hive/metastore/api/Index.java |   133 +-
 .../api/IndexAlreadyExistsException.java        |    24 +-
 .../metastore/api/InsertEventRequestData.java   |    45 +-
 .../metastore/api/InvalidInputException.java    |    24 +-
 .../metastore/api/InvalidObjectException.java   |    24 +-
 .../api/InvalidOperationException.java          |    24 +-
 .../api/InvalidPartitionException.java          |    24 +-
 .../hive/metastore/api/LockComponent.java       |    66 +-
 .../hadoop/hive/metastore/api/LockLevel.java    |     2 +-
 .../hadoop/hive/metastore/api/LockRequest.java  |    72 +-
 .../hadoop/hive/metastore/api/LockResponse.java |    36 +-
 .../hadoop/hive/metastore/api/LockState.java    |     2 +-
 .../hadoop/hive/metastore/api/LockType.java     |     2 +-
 .../hive/metastore/api/LongColumnStatsData.java |    50 +-
 .../hive/metastore/api/MetaException.java       |    24 +-
 .../hive/metastore/api/NoSuchLockException.java |    24 +-
 .../metastore/api/NoSuchObjectException.java    |    24 +-
 .../hive/metastore/api/NoSuchTxnException.java  |    24 +-
 .../hive/metastore/api/NotificationEvent.java   |    66 +-
 .../metastore/api/NotificationEventRequest.java |    34 +-
 .../api/NotificationEventResponse.java          |    46 +-
 .../hive/metastore/api/OpenTxnRequest.java      |    40 +-
 .../hive/metastore/api/OpenTxnsResponse.java    |    45 +-
 .../apache/hadoop/hive/metastore/api/Order.java |    32 +-
 .../hadoop/hive/metastore/api/Partition.java    |   156 +-
 .../hive/metastore/api/PartitionEventType.java  |     2 +-
 .../api/PartitionListComposingSpec.java         |    46 +-
 .../hive/metastore/api/PartitionSpec.java       |    58 +-
 .../api/PartitionSpecWithSharedSD.java          |    54 +-
 .../hive/metastore/api/PartitionWithoutSD.java  |   124 +-
 .../metastore/api/PartitionsByExprRequest.java  |    67 +-
 .../metastore/api/PartitionsByExprResult.java   |    54 +-
 .../metastore/api/PartitionsStatsRequest.java   |    90 +-
 .../metastore/api/PartitionsStatsResult.java    |    72 +-
 .../metastore/api/PrincipalPrivilegeSet.java    |   184 +-
 .../hive/metastore/api/PrincipalType.java       |     2 +-
 .../hadoop/hive/metastore/api/PrivilegeBag.java |    46 +-
 .../hive/metastore/api/PrivilegeGrantInfo.java  |    60 +-
 .../hive/metastore/api/RequestPartsSpec.java    |    56 +-
 .../hadoop/hive/metastore/api/ResourceType.java |     2 +-
 .../hadoop/hive/metastore/api/ResourceUri.java  |    36 +-
 .../apache/hadoop/hive/metastore/api/Role.java  |    40 +-
 .../hive/metastore/api/RolePrincipalGrant.java  |    80 +-
 .../hadoop/hive/metastore/api/Schema.java       |    91 +-
 .../hadoop/hive/metastore/api/SerDeInfo.java    |    93 +-
 .../api/SetPartitionsStatsRequest.java          |    46 +-
 .../hive/metastore/api/ShowCompactRequest.java  |    16 +-
 .../hive/metastore/api/ShowCompactResponse.java |    46 +-
 .../api/ShowCompactResponseElement.java         |    86 +-
 .../hive/metastore/api/ShowLocksRequest.java    |    16 +-
 .../hive/metastore/api/ShowLocksResponse.java   |    46 +-
 .../metastore/api/ShowLocksResponseElement.java |   114 +-
 .../hadoop/hive/metastore/api/SkewedInfo.java   |   147 +-
 .../hive/metastore/api/StorageDescriptor.java   |   242 +-
 .../metastore/api/StringColumnStatsData.java    |    48 +-
 .../apache/hadoop/hive/metastore/api/Table.java |   189 +-
 .../hive/metastore/api/TableStatsRequest.java   |    61 +-
 .../hive/metastore/api/TableStatsResult.java    |    46 +-
 .../hive/metastore/api/ThriftHiveMetastore.java | 18497 ++++++++++++-----
 .../hive/metastore/api/TxnAbortedException.java |    24 +-
 .../hadoop/hive/metastore/api/TxnInfo.java      |    52 +-
 .../hive/metastore/api/TxnOpenException.java    |    24 +-
 .../hadoop/hive/metastore/api/TxnState.java     |     2 +-
 .../apache/hadoop/hive/metastore/api/Type.java  |    72 +-
 .../hive/metastore/api/UnknownDBException.java  |    24 +-
 .../api/UnknownPartitionException.java          |    24 +-
 .../metastore/api/UnknownTableException.java    |    24 +-
 .../hive/metastore/api/UnlockRequest.java       |    24 +-
 .../hadoop/hive/metastore/api/Version.java      |    32 +-
 .../metastore/api/hive_metastoreConstants.java  |     7 +-
 .../gen-php/metastore/ThriftHiveMetastore.php   |  3328 ++-
 .../src/gen/thrift/gen-php/metastore/Types.php  |  1081 +-
 .../hive_metastore/ThriftHiveMetastore-remote   |   609 +-
 .../hive_metastore/ThriftHiveMetastore.py       |  3726 +++-
 .../thrift/gen-py/hive_metastore/constants.py   |     2 +-
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |   782 +-
 .../thrift/gen-rb/hive_metastore_constants.rb   |     2 +-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |     2 +-
 .../gen/thrift/gen-rb/thrift_hive_metastore.rb  |     2 +-
 pom.xml                                         |    12 +-
 ql/if/queryplan.thrift                          |     1 +
 ql/pom.xml                                      |     6 +
 .../gen/thrift/gen-cpp/queryplan_constants.cpp  |     2 +-
 ql/src/gen/thrift/gen-cpp/queryplan_constants.h |     2 +-
 ql/src/gen/thrift/gen-cpp/queryplan_types.cpp   |   796 +-
 ql/src/gen/thrift/gen-cpp/queryplan_types.h     |   294 +-
 .../hadoop/hive/ql/plan/api/Adjacency.java      |    65 +-
 .../hadoop/hive/ql/plan/api/AdjacencyType.java  |     2 +-
 .../apache/hadoop/hive/ql/plan/api/Graph.java   |    87 +-
 .../hadoop/hive/ql/plan/api/NodeType.java       |     2 +-
 .../hadoop/hive/ql/plan/api/Operator.java       |   142 +-
 .../hadoop/hive/ql/plan/api/OperatorType.java   |     7 +-
 .../apache/hadoop/hive/ql/plan/api/Query.java   |   176 +-
 .../hadoop/hive/ql/plan/api/QueryPlan.java      |    62 +-
 .../apache/hadoop/hive/ql/plan/api/Stage.java   |   172 +-
 .../hadoop/hive/ql/plan/api/StageType.java      |     2 +-
 .../apache/hadoop/hive/ql/plan/api/Task.java    |   182 +-
 .../hadoop/hive/ql/plan/api/TaskType.java       |     2 +-
 ql/src/gen/thrift/gen-php/Types.php             |   119 +-
 ql/src/gen/thrift/gen-py/queryplan/constants.py |     2 +-
 ql/src/gen/thrift/gen-py/queryplan/ttypes.py    |    87 +-
 ql/src/gen/thrift/gen-rb/queryplan_constants.rb |     2 +-
 ql/src/gen/thrift/gen-rb/queryplan_types.rb     |     7 +-
 .../hive/ql/exec/HashTableSinkOperator.java     |     6 +-
 .../hadoop/hive/ql/exec/OperatorFactory.java    |    11 +
 .../ql/exec/SparkHashTableSinkOperator.java     |    17 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |     1 +
 .../persistence/MapJoinTableContainerSerDe.java |    63 +-
 .../hive/ql/exec/spark/HashTableLoader.java     |    26 +-
 .../ql/exec/spark/HiveSparkClientFactory.java   |    10 +-
 .../hive/ql/exec/spark/KryoSerializer.java      |     4 +
 .../ql/exec/spark/RemoteHiveSparkClient.java    |    52 +-
 .../exec/spark/SparkDynamicPartitionPruner.java |   268 +
 .../hadoop/hive/ql/exec/spark/SparkPlan.java    |     3 -
 .../hive/ql/exec/spark/SparkPlanGenerator.java  |    15 +-
 .../hadoop/hive/ql/exec/spark/SparkTask.java    |     1 +
 .../hive/ql/exec/spark/SparkUtilities.java      |    56 +
 .../VectorSparkHashTableSinkOperator.java       |   104 +
 ...VectorSparkPartitionPruningSinkOperator.java |    99 +
 .../hive/ql/io/CombineHiveInputFormat.java      |    59 +-
 .../hadoop/hive/ql/io/HiveInputFormat.java      |    46 +-
 .../DynamicPartitionPruningOptimization.java    |    44 +-
 .../ql/optimizer/OperatorComparatorFactory.java |   552 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |     2 +-
 .../SparkRemoveDynamicPruningBySize.java        |    73 +
 .../physical/GenSparkSkewJoinProcessor.java     |    14 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |    23 +
 .../spark/CombineEquivalentWorkResolver.java    |   292 +
 .../spark/SparkPartitionPruningSinkDesc.java    |   100 +
 .../spark/SparkReduceSinkMapJoinProc.java       |     2 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |     2 +-
 .../ql/parse/spark/GenSparkProcContext.java     |    14 +-
 .../hive/ql/parse/spark/GenSparkUtils.java      |   111 +-
 .../parse/spark/OptimizeSparkProcContext.java   |    16 +-
 .../hive/ql/parse/spark/SparkCompiler.java      |   180 +-
 .../SparkPartitionPruningSinkOperator.java      |   142 +
 .../hive/ql/parse/spark/SplitOpTreeForDPP.java  |   151 +
 .../hadoop/hive/ql/plan/JoinCondDesc.java       |    14 +
 .../apache/hadoop/hive/ql/plan/JoinDesc.java    |     4 +
 .../org/apache/hadoop/hive/ql/plan/MapWork.java |    10 +
 .../hadoop/hive/ql/plan/ReduceSinkDesc.java     |     1 +
 .../hive/ql/plan/SparkHashTableSinkDesc.java    |    11 +
 .../hadoop/hive/ql/plan/TableScanDesc.java      |     6 +-
 .../hive/ql/ppd/SyntheticJoinPredicate.java     |    14 +-
 .../queries/clientpositive/dynamic_rdd_cache.q  |   111 +
 ql/src/test/queries/clientpositive/groupby5.q   |     2 +
 .../spark_dynamic_partition_pruning.q           |   180 +
 .../spark_dynamic_partition_pruning_2.q         |   118 +
 ...spark_vectorized_dynamic_partition_pruning.q |   192 +
 .../queries/clientpositive/udf_percentile.q     |     2 +
 .../groupby2_map_skew_multi_distinct.q.out      |     9 +
 .../spark/groupby2_multi_distinct.q.out         |     9 +
 .../groupby3_map_skew_multi_distinct.q.out      |     9 +
 .../spark/groupby3_multi_distinct.q.out         |     9 +
 .../spark/groupby_grouping_sets7.q.out          |     9 +
 .../clientpositive/dynamic_rdd_cache.q.out      |  1420 ++
 .../test/results/clientpositive/groupby5.q.out  |     8 +-
 .../clientpositive/spark/auto_join18.q.out      |    24 +-
 .../clientpositive/spark/auto_join30.q.out      |    51 +-
 .../clientpositive/spark/auto_join32.q.out      |    24 +-
 .../spark/auto_smb_mapjoin_14.q.out             |    30 +-
 .../spark/auto_sortmerge_join_10.q.out          |    23 +-
 .../results/clientpositive/spark/bucket2.q.out  |     3 -
 .../results/clientpositive/spark/bucket3.q.out  |     3 -
 .../results/clientpositive/spark/bucket4.q.out  |     3 -
 .../spark/column_access_stats.q.out             |     4 -
 .../spark/dynamic_rdd_cache.q.out               |  1073 +
 .../clientpositive/spark/groupby10.q.out        |    32 +-
 .../clientpositive/spark/groupby1_map.q.out     |   412 +
 .../spark/groupby1_map_nomap.q.out              |   408 +
 .../spark/groupby1_map_skew.q.out               |   427 +
 .../clientpositive/spark/groupby1_noskew.q.out  |   406 +
 .../clientpositive/spark/groupby2_map.q.out     |   118 +
 .../spark/groupby2_map_multi_distinct.q.out     |   232 +
 .../spark/groupby2_map_skew.q.out               |   129 +
 .../clientpositive/spark/groupby2_noskew.q.out  |   111 +
 .../spark/groupby2_noskew_multi_distinct.q.out  |   114 +
 .../clientpositive/spark/groupby4_map.q.out     |    93 +
 .../spark/groupby4_map_skew.q.out               |    93 +
 .../clientpositive/spark/groupby4_noskew.q.out  |   104 +
 .../results/clientpositive/spark/groupby5.q.out |   433 +
 .../clientpositive/spark/groupby5_map.q.out     |    95 +
 .../spark/groupby5_map_skew.q.out               |    95 +
 .../clientpositive/spark/groupby5_noskew.q.out  |   418 +
 .../results/clientpositive/spark/groupby6.q.out |   113 +
 .../clientpositive/spark/groupby6_map.q.out     |   109 +
 .../spark/groupby6_map_skew.q.out               |   122 +
 .../clientpositive/spark/groupby6_noskew.q.out  |   104 +
 .../clientpositive/spark/groupby7_map.q.out     |    23 +-
 .../spark/groupby7_map_skew.q.out               |    38 +-
 .../clientpositive/spark/groupby7_noskew.q.out  |    17 +-
 .../groupby7_noskew_multi_single_reducer.q.out  |    18 +-
 .../results/clientpositive/spark/groupby8.q.out |    62 +-
 .../spark/groupby8_map_skew.q.out               |    37 +-
 .../spark/groupby_grouping_id2.q.out            |   230 +
 .../clientpositive/spark/groupby_position.q.out |    37 +-
 .../spark/groupby_ppr_multi_distinct.q.out      |   346 +
 .../spark/groupby_resolution.q.out              |   796 +
 .../clientpositive/spark/insert_into3.q.out     |    33 +-
 .../results/clientpositive/spark/join18.q.out   |    24 +-
 .../results/clientpositive/spark/join22.q.out   |    19 +-
 .../spark/limit_partition_metadataonly.q.out    |     2 -
 .../clientpositive/spark/limit_pushdown.q.out   |    31 +-
 .../spark/list_bucket_dml_2.q.java1.7.out       |     3 -
 .../clientpositive/spark/load_dyn_part14.q.out  |    30 +-
 .../clientpositive/spark/nullgroup.q.out        |   265 +
 .../clientpositive/spark/nullgroup2.q.out       |   300 +
 .../clientpositive/spark/nullgroup4.q.out       |   292 +
 .../spark/nullgroup4_multi_distinct.q.out       |   133 +
 .../spark/optimize_nullscan.q.out               |     3 -
 .../test/results/clientpositive/spark/pcr.q.out |     6 -
 .../results/clientpositive/spark/sample3.q.out  |     3 -
 .../results/clientpositive/spark/sample9.q.out  |     3 -
 .../clientpositive/spark/skewjoinopt11.q.out    |    60 +-
 .../clientpositive/spark/skewjoinopt9.q.out     |    20 +-
 .../clientpositive/spark/smb_mapjoin_11.q.out   |     6 -
 .../spark/spark_dynamic_partition_pruning.q.out |  5573 +++++
 .../spark_dynamic_partition_pruning_2.q.out     |  1015 +
 ...k_vectorized_dynamic_partition_pruning.q.out |  5822 ++++++
 .../clientpositive/spark/temp_table_gb1.q.out   |    67 +
 .../clientpositive/spark/udaf_collect_set.q.out |   212 +
 .../clientpositive/spark/udf_example_add.q.out  |     3 -
 .../clientpositive/spark/udf_in_file.q.out      |     3 -
 .../results/clientpositive/spark/udf_max.q.out  |    62 +
 .../results/clientpositive/spark/udf_min.q.out  |    62 +
 .../clientpositive/spark/udf_percentile.q.out   |   450 +
 .../results/clientpositive/spark/union10.q.out  |    36 +-
 .../results/clientpositive/spark/union11.q.out  |    38 +-
 .../results/clientpositive/spark/union15.q.out  |    23 +-
 .../results/clientpositive/spark/union16.q.out  |   482 +-
 .../results/clientpositive/spark/union2.q.out   |    22 +-
 .../results/clientpositive/spark/union20.q.out  |    18 +-
 .../results/clientpositive/spark/union25.q.out  |    21 +-
 .../results/clientpositive/spark/union28.q.out  |    21 +-
 .../results/clientpositive/spark/union3.q.out   |    45 +-
 .../results/clientpositive/spark/union30.q.out  |    21 +-
 .../results/clientpositive/spark/union4.q.out   |    18 +-
 .../results/clientpositive/spark/union5.q.out   |    20 +-
 .../results/clientpositive/spark/union9.q.out   |    42 +-
 .../clientpositive/spark/union_remove_1.q.out   |    23 +-
 .../clientpositive/spark/union_remove_15.q.out  |    23 +-
 .../clientpositive/spark/union_remove_16.q.out  |    23 +-
 .../clientpositive/spark/union_remove_18.q.out  |    23 +-
 .../clientpositive/spark/union_remove_19.q.out  |    75 +-
 .../clientpositive/spark/union_remove_20.q.out  |    23 +-
 .../clientpositive/spark/union_remove_21.q.out  |    21 +-
 .../clientpositive/spark/union_remove_22.q.out  |    46 +-
 .../clientpositive/spark/union_remove_24.q.out  |    23 +-
 .../clientpositive/spark/union_remove_25.q.out  |    59 +-
 .../clientpositive/spark/union_remove_4.q.out   |    23 +-
 .../clientpositive/spark/union_remove_6.q.out   |    23 +-
 .../spark/union_remove_6_subq.q.out             |    64 +-
 .../clientpositive/spark/union_remove_7.q.out   |    23 +-
 .../clientpositive/spark/union_top_level.q.out  |    59 +-
 .../clientpositive/spark/union_view.q.out       |     9 -
 .../spark/vector_count_distinct.q.out           |    31 +-
 .../spark/vector_decimal_mapjoin.q.out          |     1 +
 .../clientpositive/spark/vector_elt.q.out       |     7 -
 .../spark/vector_left_outer_join.q.out          |     2 +
 .../spark/vector_mapjoin_reduce.q.out           |     1 +
 .../spark/vector_string_concat.q.out            |     3 -
 .../spark/vectorization_decimal_date.q.out      |     4 -
 .../spark/vectorization_div0.q.out              |     3 -
 .../clientpositive/spark/vectorized_case.q.out  |     3 -
 .../spark/vectorized_mapjoin.q.out              |     1 +
 .../spark/vectorized_math_funcs.q.out           |     3 -
 .../spark/vectorized_nested_mapjoin.q.out       |     2 +
 .../spark/vectorized_string_funcs.q.out         |     3 -
 .../results/clientpositive/udf_percentile.q.out |   104 +-
 .../gen/thrift/gen-cpp/complex_constants.cpp    |     2 +-
 .../src/gen/thrift/gen-cpp/complex_constants.h  |     2 +-
 serde/src/gen/thrift/gen-cpp/complex_types.cpp  |   442 +-
 serde/src/gen/thrift/gen-cpp/complex_types.h    |   174 +-
 .../gen/thrift/gen-cpp/megastruct_constants.cpp |     2 +-
 .../gen/thrift/gen-cpp/megastruct_constants.h   |     2 +-
 .../src/gen/thrift/gen-cpp/megastruct_types.cpp |   585 +-
 serde/src/gen/thrift/gen-cpp/megastruct_types.h |   175 +-
 .../src/gen/thrift/gen-cpp/serde_constants.cpp  |     2 +-
 serde/src/gen/thrift/gen-cpp/serde_constants.h  |     2 +-
 serde/src/gen/thrift/gen-cpp/serde_types.cpp    |     5 +-
 serde/src/gen/thrift/gen-cpp/serde_types.h      |     5 +-
 .../gen/thrift/gen-cpp/testthrift_constants.cpp |     2 +-
 .../gen/thrift/gen-cpp/testthrift_constants.h   |     2 +-
 .../src/gen/thrift/gen-cpp/testthrift_types.cpp |    95 +-
 serde/src/gen/thrift/gen-cpp/testthrift_types.h |    45 +-
 .../hadoop/hive/serde/serdeConstants.java       |     7 +-
 .../hadoop/hive/serde/test/InnerStruct.java     |    24 +-
 .../hadoop/hive/serde/test/ThriftTestObj.java   |    62 +-
 .../hadoop/hive/serde2/thrift/test/Complex.java |   279 +-
 .../hive/serde2/thrift/test/IntString.java      |    40 +-
 .../hive/serde2/thrift/test/MegaStruct.java     |   521 +-
 .../hive/serde2/thrift/test/MiniStruct.java     |    38 +-
 .../hadoop/hive/serde2/thrift/test/MyEnum.java  |     2 +-
 .../hive/serde2/thrift/test/PropValueUnion.java |    60 +-
 .../hive/serde2/thrift/test/SetIntString.java   |    54 +-
 serde/src/gen/thrift/gen-php/Types.php          |    15 +-
 .../org/apache/hadoop/hive/serde/Types.php      |   373 +-
 .../src/gen/thrift/gen-py/complex/constants.py  |     2 +-
 serde/src/gen/thrift/gen-py/complex/ttypes.py   |    50 +-
 .../gen/thrift/gen-py/megastruct/constants.py   |     2 +-
 .../src/gen/thrift/gen-py/megastruct/ttypes.py  |    44 +-
 .../org_apache_hadoop_hive_serde/constants.py   |     2 +-
 .../org_apache_hadoop_hive_serde/ttypes.py      |     2 +-
 .../gen/thrift/gen-py/testthrift/constants.py   |     2 +-
 .../src/gen/thrift/gen-py/testthrift/ttypes.py  |    14 +-
 .../src/gen/thrift/gen-rb/complex_constants.rb  |     2 +-
 serde/src/gen/thrift/gen-rb/complex_types.rb    |     2 +-
 .../gen/thrift/gen-rb/megastruct_constants.rb   |     2 +-
 serde/src/gen/thrift/gen-rb/megastruct_types.rb |     2 +-
 serde/src/gen/thrift/gen-rb/serde_constants.rb  |     2 +-
 serde/src/gen/thrift/gen-rb/serde_types.rb      |     2 +-
 .../gen/thrift/gen-rb/testthrift_constants.rb   |     2 +-
 serde/src/gen/thrift/gen-rb/testthrift_types.rb |     2 +-
 .../lazy/fast/LazySimpleDeserializeRead.java    |     4 +-
 service/src/gen/thrift/gen-cpp/TCLIService.cpp  |   458 +-
 service/src/gen/thrift/gen-cpp/TCLIService.h    |   821 +-
 .../thrift/gen-cpp/TCLIService_constants.cpp    |     2 +-
 .../gen/thrift/gen-cpp/TCLIService_constants.h  |     2 +-
 .../gen/thrift/gen-cpp/TCLIService_types.cpp    |  3250 ++-
 .../src/gen/thrift/gen-cpp/TCLIService_types.h  |  1482 +-
 service/src/gen/thrift/gen-cpp/ThriftHive.cpp   |   286 +-
 service/src/gen/thrift/gen-cpp/ThriftHive.h     |   389 +-
 .../thrift/gen-cpp/hive_service_constants.cpp   |     2 +-
 .../gen/thrift/gen-cpp/hive_service_constants.h |     2 +-
 .../gen/thrift/gen-cpp/hive_service_types.cpp   |   110 +-
 .../src/gen/thrift/gen-cpp/hive_service_types.h |    75 +-
 .../hadoop/hive/service/HiveClusterStatus.java  |    68 +-
 .../hive/service/HiveServerException.java       |    40 +-
 .../hadoop/hive/service/JobTrackerState.java    |     2 +-
 .../apache/hadoop/hive/service/ThriftHive.java  |   914 +-
 .../service/cli/thrift/TArrayTypeEntry.java     |    24 +-
 .../hive/service/cli/thrift/TBinaryColumn.java  |    64 +-
 .../hive/service/cli/thrift/TBoolColumn.java    |    62 +-
 .../hive/service/cli/thrift/TBoolValue.java     |    26 +-
 .../hive/service/cli/thrift/TByteColumn.java    |    62 +-
 .../hive/service/cli/thrift/TByteValue.java     |    26 +-
 .../hive/service/cli/thrift/TCLIService.java    |  1734 +-
 .../cli/thrift/TCLIServiceConstants.java        |     7 +-
 .../cli/thrift/TCancelDelegationTokenReq.java   |    32 +-
 .../cli/thrift/TCancelDelegationTokenResp.java  |    24 +-
 .../service/cli/thrift/TCancelOperationReq.java |    24 +-
 .../cli/thrift/TCancelOperationResp.java        |    24 +-
 .../service/cli/thrift/TCloseOperationReq.java  |    24 +-
 .../service/cli/thrift/TCloseOperationResp.java |    24 +-
 .../service/cli/thrift/TCloseSessionReq.java    |    24 +-
 .../service/cli/thrift/TCloseSessionResp.java   |    24 +-
 .../apache/hive/service/cli/thrift/TColumn.java |    20 +-
 .../hive/service/cli/thrift/TColumnDesc.java    |    50 +-
 .../hive/service/cli/thrift/TColumnValue.java   |    20 +-
 .../hive/service/cli/thrift/TDoubleColumn.java  |    62 +-
 .../hive/service/cli/thrift/TDoubleValue.java   |    26 +-
 .../cli/thrift/TExecuteStatementReq.java        |    87 +-
 .../cli/thrift/TExecuteStatementResp.java       |    34 +-
 .../service/cli/thrift/TFetchOrientation.java   |     2 +-
 .../service/cli/thrift/TFetchResultsReq.java    |    54 +-
 .../service/cli/thrift/TFetchResultsResp.java   |    42 +-
 .../service/cli/thrift/TGetCatalogsReq.java     |    24 +-
 .../service/cli/thrift/TGetCatalogsResp.java    |    34 +-
 .../hive/service/cli/thrift/TGetColumnsReq.java |    58 +-
 .../service/cli/thrift/TGetColumnsResp.java     |    34 +-
 .../cli/thrift/TGetDelegationTokenReq.java      |    40 +-
 .../cli/thrift/TGetDelegationTokenResp.java     |    34 +-
 .../service/cli/thrift/TGetFunctionsReq.java    |    50 +-
 .../service/cli/thrift/TGetFunctionsResp.java   |    34 +-
 .../hive/service/cli/thrift/TGetInfoReq.java    |    36 +-
 .../hive/service/cli/thrift/TGetInfoResp.java   |    32 +-
 .../hive/service/cli/thrift/TGetInfoType.java   |     2 +-
 .../hive/service/cli/thrift/TGetInfoValue.java  |    20 +-
 .../cli/thrift/TGetOperationStatusReq.java      |    24 +-
 .../cli/thrift/TGetOperationStatusResp.java     |    62 +-
 .../cli/thrift/TGetResultSetMetadataReq.java    |    24 +-
 .../cli/thrift/TGetResultSetMetadataResp.java   |    34 +-
 .../hive/service/cli/thrift/TGetSchemasReq.java |    42 +-
 .../service/cli/thrift/TGetSchemasResp.java     |    34 +-
 .../service/cli/thrift/TGetTableTypesReq.java   |    24 +-
 .../service/cli/thrift/TGetTableTypesResp.java  |    34 +-
 .../hive/service/cli/thrift/TGetTablesReq.java  |    79 +-
 .../hive/service/cli/thrift/TGetTablesResp.java |    34 +-
 .../service/cli/thrift/TGetTypeInfoReq.java     |    24 +-
 .../service/cli/thrift/TGetTypeInfoResp.java    |    34 +-
 .../service/cli/thrift/THandleIdentifier.java   |    50 +-
 .../hive/service/cli/thrift/TI16Column.java     |    62 +-
 .../hive/service/cli/thrift/TI16Value.java      |    26 +-
 .../hive/service/cli/thrift/TI32Column.java     |    62 +-
 .../hive/service/cli/thrift/TI32Value.java      |    26 +-
 .../hive/service/cli/thrift/TI64Column.java     |    62 +-
 .../hive/service/cli/thrift/TI64Value.java      |    26 +-
 .../hive/service/cli/thrift/TMapTypeEntry.java  |    32 +-
 .../service/cli/thrift/TOpenSessionReq.java     |    91 +-
 .../service/cli/thrift/TOpenSessionResp.java    |    91 +-
 .../service/cli/thrift/TOperationHandle.java    |    54 +-
 .../service/cli/thrift/TOperationState.java     |     2 +-
 .../hive/service/cli/thrift/TOperationType.java |     2 +-
 .../service/cli/thrift/TPrimitiveTypeEntry.java |    38 +-
 .../service/cli/thrift/TProtocolVersion.java    |     2 +-
 .../cli/thrift/TRenewDelegationTokenReq.java    |    32 +-
 .../cli/thrift/TRenewDelegationTokenResp.java   |    24 +-
 .../apache/hive/service/cli/thrift/TRow.java    |    46 +-
 .../apache/hive/service/cli/thrift/TRowSet.java |    86 +-
 .../hive/service/cli/thrift/TSessionHandle.java |    24 +-
 .../apache/hive/service/cli/thrift/TStatus.java |    83 +-
 .../hive/service/cli/thrift/TStatusCode.java    |     2 +-
 .../hive/service/cli/thrift/TStringColumn.java  |    62 +-
 .../hive/service/cli/thrift/TStringValue.java   |    26 +-
 .../service/cli/thrift/TStructTypeEntry.java    |    50 +-
 .../hive/service/cli/thrift/TTableSchema.java   |    46 +-
 .../hive/service/cli/thrift/TTypeDesc.java      |    46 +-
 .../hive/service/cli/thrift/TTypeEntry.java     |    20 +-
 .../apache/hive/service/cli/thrift/TTypeId.java |     2 +-
 .../service/cli/thrift/TTypeQualifierValue.java |    20 +-
 .../service/cli/thrift/TTypeQualifiers.java     |    54 +-
 .../service/cli/thrift/TUnionTypeEntry.java     |    50 +-
 .../cli/thrift/TUserDefinedTypeEntry.java       |    24 +-
 service/src/gen/thrift/gen-php/TCLIService.php  |   269 +-
 service/src/gen/thrift/gen-php/ThriftHive.php   |   125 +-
 service/src/gen/thrift/gen-php/Types.php        |    30 +-
 .../gen-py/TCLIService/TCLIService-remote       |   102 +-
 .../thrift/gen-py/TCLIService/TCLIService.py    |   439 +-
 .../gen/thrift/gen-py/TCLIService/constants.py  |     2 +-
 .../src/gen/thrift/gen-py/TCLIService/ttypes.py |   478 +-
 .../gen-py/hive_service/ThriftHive-remote       |  1014 +-
 .../thrift/gen-py/hive_service/ThriftHive.py    |   266 +-
 .../gen/thrift/gen-py/hive_service/constants.py |     2 +-
 .../gen/thrift/gen-py/hive_service/ttypes.py    |    19 +-
 .../gen/thrift/gen-rb/hive_service_constants.rb |     2 +-
 .../src/gen/thrift/gen-rb/hive_service_types.rb |     2 +-
 .../src/gen/thrift/gen-rb/t_c_l_i_service.rb    |     2 +-
 .../thrift/gen-rb/t_c_l_i_service_constants.rb  |     2 +-
 .../gen/thrift/gen-rb/t_c_l_i_service_types.rb  |     2 +-
 service/src/gen/thrift/gen-rb/thrift_hive.rb    |     2 +-
 .../cli/thrift/ThriftBinaryCLIService.java      |     1 +
 spark-client/pom.xml                            |     5 +
 .../hive/spark/client/SparkClientImpl.java      |    21 +-
 .../hive/spark/client/SparkClientUtilities.java |    13 +-
 .../hive/spark/client/TestSparkClient.java      |     4 +-
 497 files changed, 79181 insertions(+), 24699 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/25199156/ql/pom.xml
----------------------------------------------------------------------

[3/5] hive git commit: HIVE-11425 - submitting a query via CLI against... (Eugene Koifman, reviewed by Prasanth Jayachandran)

Posted by xu...@apache.org.

HIVE-11425 - submitting a query via CLI against... (Eugene Koifman, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc528ba3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc528ba3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc528ba3

Branch: refs/heads/spark
Commit: bc528ba35d58af61f4d854003d99af50818f909a
Parents: 8c0016a
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Fri Jul 31 13:20:23 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Fri Jul 31 13:22:14 2015 -0700

----------------------------------------------------------------------
 ql/pom.xml | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bc528ba3/ql/pom.xml
----------------------------------------------------------------------
diff --git a/ql/pom.xml b/ql/pom.xml
index 6026c49..fc66591 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -707,6 +707,7 @@
                   <include>org.apache.hive.shims:hive-shims-0.23</include>
                   <include>org.apache.hive.shims:hive-shims-0.23</include>
                   <include>org.apache.hive.shims:hive-shims-common</include>
+                  <include>org.apache.hive:hive-storage-api</include>
                   <include>com.googlecode.javaewah:JavaEWAH</include>
                   <include>javolution:javolution</include>
                   <include>com.google.protobuf:protobuf-java</include>

[2/5] hive git commit: HIVE-11384 : Add Test case which cover both HIVE-11271 and HIVE-11333 (Yongzhi Chen via Szehon)

Posted by xu...@apache.org.

HIVE-11384 : Add Test case which cover both HIVE-11271 and HIVE-11333 (Yongzhi Chen via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c0016aa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c0016aa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c0016aa

Branch: refs/heads/spark
Commit: 8c0016aa6eee33777ff38363c307bc00b9081770
Parents: 724b319
Author: Szehon Ho <sz...@cloudera.com>
Authored: Fri Jul 31 12:07:34 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Fri Jul 31 12:07:34 2015 -0700

----------------------------------------------------------------------
 .../clientpositive/unionall_unbalancedppd.q     |  72 +++++
 .../clientpositive/unionall_unbalancedppd.q.out | 280 +++++++++++++++++++
 2 files changed, 352 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8c0016aa/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
index 0825c2d..ab0a70b 100644
--- a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
+++ b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
@@ -102,6 +102,25 @@ from union_all_bug_test_2
 ) A
 WHERE (filter = 1 or filter = 0);
 
+explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1);
+
 SELECT f1
 FROM (
 
@@ -118,3 +137,56 @@ f1
 from union_all_bug_test_2
 ) A
 WHERE (f1 = 1);
+
+explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1 and filter = 1);

http://git-wip-us.apache.org/repos/asf/hive/blob/8c0016aa/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
index 46828e9..88a6f30 100644
--- a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
+++ b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
@@ -329,6 +329,105 @@ POSTHOOK: Input: default@union_all_bug_test_2
 0
 1
 0
+PREHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: union_all_bug_test_1
+            Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (f1 = 1) (type: boolean)
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: 1 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                Union
+                  Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: 1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: union_all_bug_test_2
+            Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (f1 = 1) (type: boolean)
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: 1 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                Union
+                  Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: 1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT f1
 FROM (
 
@@ -371,3 +470,184 @@ POSTHOOK: Input: default@union_all_bug_test_2
 #### A masked pattern was here ####
 1
 1
+PREHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: union_all_bug_test_1
+            Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((if(true, f1, f2) = 1) and (f1 = 1)) (type: boolean)
+              Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: 1 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                Union
+                  Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: 1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: union_all_bug_test_2
+            Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: false (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Select Operator
+                expressions: 1 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                Union
+                  Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: 1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 and f1 = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1 and filter = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1 and filter = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1