You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/03/29 20:19:33 UTC
hive git commit: HIVE-11424 : Rule to transform OR clauses into IN
clauses in CBO (Jesus Camacho Rodriguez via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 09b00fc86 -> 8c8ff3f14
HIVE-11424 : Rule to transform OR clauses into IN clauses in CBO (Jesus Camacho Rodriguez via Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c8ff3f1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c8ff3f1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c8ff3f1
Branch: refs/heads/master
Commit: 8c8ff3f144921e9b985abe51eb82ebad94195b4a
Parents: 09b00fc
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue Mar 22 23:41:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Mar 29 11:18:58 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 7 +
.../hadoop/hive/ql/optimizer/Optimizer.java | 4 +-
.../rules/HivePointLookupOptimizerRule.java | 381 +++++++++++++++++++
.../ql/optimizer/pcr/PcrExprProcFactory.java | 103 ++---
.../hadoop/hive/ql/parse/CalcitePlanner.java | 40 +-
.../clientpositive/auto_join19_inclause.q | 18 +
.../queries/clientpositive/filter_in_or_dup.q | 19 +
.../clientpositive/auto_join19_inclause.q.out | 130 +++++++
.../clientpositive/constprog_semijoin.q.out | 4 +-
.../dynpart_sort_optimization_acid.q.out | 4 +-
.../clientpositive/filter_in_or_dup.q.out | 96 +++++
.../results/clientpositive/perf/query13.q.out | 14 +-
.../results/clientpositive/perf/query27.q.out | 2 +-
.../results/clientpositive/perf/query34.q.out | 2 +-
.../results/clientpositive/perf/query48.q.out | 14 +-
.../results/clientpositive/perf/query68.q.out | 2 +-
.../results/clientpositive/perf/query73.q.out | 2 +-
.../results/clientpositive/perf/query79.q.out | 2 +-
.../results/clientpositive/perf/query82.q.out | 2 +-
.../results/clientpositive/perf/query85.q.out | 26 +-
.../results/clientpositive/pointlookup2.q.out | 38 +-
.../results/clientpositive/pointlookup3.q.out | 50 ++-
.../results/clientpositive/pointlookup4.q.out | 2 +-
.../spark/constprog_semijoin.q.out | 4 +-
.../clientpositive/tez/bucketpruning1.q.out | 8 +-
.../clientpositive/tez/constprog_semijoin.q.out | 4 +-
.../tez/vector_mr_diff_schema_alias.q.out | 2 +-
.../vector_mr_diff_schema_alias.q.out | 2 +-
28 files changed, 824 insertions(+), 158 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index b516925..56b96b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -1398,6 +1398,13 @@ public final class FunctionRegistry {
}
/**
+ * Returns whether the exprNodeDesc is a node of "in".
+ */
+ public static boolean isIn(ExprNodeDesc desc) {
+ return GenericUDFIn.class == getGenericUDFClassFromExprDesc(desc);
+ }
+
+ /**
* Returns whether the exprNodeDesc is a node of "not".
*/
public static boolean isOpNot(ExprNodeDesc desc) {
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index f56cd96..55c71dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -23,7 +23,6 @@ import java.util.List;
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
@@ -83,7 +82,8 @@ public class Optimizer {
}
// Try to transform OR predicates in Filter into simpler IN clauses first
- if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+ if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) &&
+ !pctx.getContext().isCboSucceeded()) {
final int min = HiveConf.getIntVar(hiveConf,
HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
transformations.add(new PointLookupOptimizer(min));
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
new file mode 100644
index 0000000..9609a1e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java
@@ -0,0 +1,381 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.LinkedHashMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+
+/**
+ * This optimization will take a Filter expression, and if its predicate contains
+ * an OR operator whose children are constant equality expressions, it will try
+ * to generate an IN clause (which is more efficient). If the OR operator contains
+ * AND operator children, the optimization might generate an IN clause that uses
+ * structs.
+ */
+public class HivePointLookupOptimizerRule extends RelOptRule {
+
+ protected static final Log LOG = LogFactory.getLog(HivePointLookupOptimizerRule.class);
+
+
+ // Minimum number of OR clauses needed to transform into IN clauses
+ private final int min;
+
+ public HivePointLookupOptimizerRule(int min) {
+ super(operand(Filter.class, any()));
+ this.min = min;
+ }
+
+ public void onMatch(RelOptRuleCall call) {
+ final Filter filter = call.rel(0);
+
+ final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
+
+ final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
+
+ // 1. We try to transform possible candidates
+ RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, filter, min);
+ RexNode newCondition = transformIntoInClause.apply(condition);
+
+ // 2. We merge IN expressions
+ RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder);
+ newCondition = mergeInClause.apply(newCondition);
+
+ // 3. If we could not transform anything, we bail out
+ if (newCondition.toString().equals(condition.toString())) {
+ return;
+ }
+
+ // 4. We create the filter with the new condition
+ RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition);
+
+ call.transformTo(newFilter);
+ }
+
+
+ /**
+ * Transforms OR clauses into IN clauses, when possible.
+ */
+ protected static class RexTransformIntoInClause extends RexShuttle {
+ private final RexBuilder rexBuilder;
+ private final Filter filterOp;
+ private final int min;
+
+ RexTransformIntoInClause(RexBuilder rexBuilder, Filter filterOp, int min) {
+ this.filterOp = filterOp;
+ this.rexBuilder = rexBuilder;
+ this.min = min;
+ }
+
+ @Override public RexNode visitCall(RexCall call) {
+ RexNode node;
+ switch (call.getKind()) {
+ case AND:
+ ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) call).getOperands());
+ List<RexNode> newOperands = new ArrayList<RexNode>();
+ for (RexNode operand: operands) {
+ RexNode newOperand;
+ if (operand.getKind() == SqlKind.OR) {
+ try {
+ newOperand = transformIntoInClauseCondition(rexBuilder,
+ filterOp.getRowType(), operand, min);
+ if (newOperand == null) {
+ return call;
+ }
+ } catch (SemanticException e) {
+ LOG.error("Exception in HivePointLookupOptimizerRule", e);
+ return call;
+ }
+ } else {
+ newOperand = operand;
+ }
+ newOperands.add(newOperand);
+ }
+ node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
+ break;
+ case OR:
+ try {
+ node = transformIntoInClauseCondition(rexBuilder,
+ filterOp.getRowType(), call, min);
+ if (node == null) {
+ return call;
+ }
+ } catch (SemanticException e) {
+ LOG.error("Exception in HivePointLookupOptimizerRule", e);
+ return call;
+ }
+ break;
+ default:
+ return super.visitCall(call);
+ }
+ return node;
+ }
+
+ private static RexNode transformIntoInClauseCondition(RexBuilder rexBuilder, RelDataType inputSchema,
+ RexNode condition, int min) throws SemanticException {
+ assert condition.getKind() == SqlKind.OR;
+
+ // 1. We extract the information necessary to create the predicate for the new
+ // filter
+ ListMultimap<RexInputRef,RexLiteral> columnConstantsMap = ArrayListMultimap.create();
+ ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands());
+ if (operands.size() < min) {
+ // We bail out
+ return null;
+ }
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+
+ final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand);
+ final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);
+
+ for (RexNode conjunction: conjunctions) {
+ // 1.1. If it is not a RexCall, we bail out
+ if (!(conjunction instanceof RexCall)) {
+ return null;
+ }
+ // 1.2. We extract the information that we need
+ RexCall conjCall = (RexCall) conjunction;
+ if(conjCall.getOperator().getKind() == SqlKind.EQUALS) {
+ if (conjCall.operands.get(0) instanceof RexInputRef &&
+ conjCall.operands.get(1) instanceof RexLiteral) {
+ RexInputRef ref = (RexInputRef) conjCall.operands.get(0);
+ RexLiteral literal = (RexLiteral) conjCall.operands.get(1);
+ columnConstantsMap.put(ref, literal);
+ if (columnConstantsMap.get(ref).size() != i+1) {
+ // If we have not added to this column before, we bail out
+ return null;
+ }
+ } else if (conjCall.operands.get(1) instanceof RexInputRef &&
+ conjCall.operands.get(0) instanceof RexLiteral) {
+ RexInputRef ref = (RexInputRef) conjCall.operands.get(1);
+ RexLiteral literal = (RexLiteral) conjCall.operands.get(0);
+ columnConstantsMap.put(ref, literal);
+ if (columnConstantsMap.get(ref).size() != i+1) {
+ // If we have not added to this column before, we bail out
+ return null;
+ }
+ } else {
+ // Bail out
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+ }
+
+ // 3. We build the new predicate and return it
+ List<RexNode> newOperands = new ArrayList<RexNode>(operands.size());
+ // 3.1 Create structs
+ List<RexInputRef> columns = new ArrayList<RexInputRef>();
+ List<String> names = new ArrayList<String>();
+ ImmutableList.Builder<RelDataType> paramsTypes = ImmutableList.builder();
+ List<TypeInfo> structReturnType = new ArrayList<TypeInfo>();
+ ImmutableList.Builder<RelDataType> newOperandsTypes = ImmutableList.builder();
+ for (int i = 0; i < operands.size(); i++) {
+ List<RexLiteral> constantFields = new ArrayList<RexLiteral>(operands.size());
+
+ for (RexInputRef ref : columnConstantsMap.keySet()) {
+ // If any of the elements was not referenced by every operand, we bail out
+ if (columnConstantsMap.get(ref).size() <= i) {
+ return null;
+ }
+ RexLiteral columnConstant = columnConstantsMap.get(ref).get(i);
+ if (i == 0) {
+ columns.add(ref);
+ names.add(inputSchema.getFieldNames().get(ref.getIndex()));
+ paramsTypes.add(ref.getType());
+ structReturnType.add(TypeConverter.convert(ref.getType()));
+ }
+ constantFields.add(columnConstant);
+ }
+
+ if (i == 0) {
+ RexNode columnsRefs;
+ if (columns.size() == 1) {
+ columnsRefs = columns.get(0);
+ } else {
+ // Create STRUCT clause
+ columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, columns);
+ }
+ newOperands.add(columnsRefs);
+ newOperandsTypes.add(columnsRefs.getType());
+ }
+ RexNode values;
+ if (constantFields.size() == 1) {
+ values = constantFields.get(0);
+ } else {
+ // Create STRUCT clause
+ values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, constantFields);
+ }
+ newOperands.add(values);
+ newOperandsTypes.add(values.getType());
+ }
+
+ // 4. Create and return IN clause
+ return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands);
+ }
+
+ }
+
+ /**
+ * Merge IN clauses, when possible.
+ */
+ protected static class RexMergeInClause extends RexShuttle {
+ private final RexBuilder rexBuilder;
+
+ RexMergeInClause(RexBuilder rexBuilder) {
+ this.rexBuilder = rexBuilder;
+ }
+
+ @Override public RexNode visitCall(RexCall call) {
+ RexNode node;
+ final List<RexNode> operands;
+ final List<RexNode> newOperands;
+ Map<String,RexNode> stringToExpr = Maps.newHashMap();
+ Multimap<String,String> inLHSExprToRHSExprs = LinkedHashMultimap.create();
+ switch (call.getKind()) {
+ case AND:
+ // IN clauses need to be combined by keeping only common elements
+ operands = Lists.newArrayList(RexUtil.flattenAnd(((RexCall) call).getOperands()));
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ String ref = inCall.getOperands().get(0).toString();
+ stringToExpr.put(ref, inCall.getOperands().get(0));
+ if (inLHSExprToRHSExprs.containsKey(ref)) {
+ Set<String> expressions = Sets.newHashSet();
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ String expr = inCall.getOperands().get(j).toString();
+ expressions.add(expr);
+ stringToExpr.put(expr, inCall.getOperands().get(j));
+ }
+ inLHSExprToRHSExprs.get(ref).retainAll(expressions);
+ } else {
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ String expr = inCall.getOperands().get(j).toString();
+ inLHSExprToRHSExprs.put(ref, expr);
+ stringToExpr.put(expr, inCall.getOperands().get(j));
+ }
+ }
+ operands.remove(i);
+ --i;
+ }
+ }
+ // Create IN clauses
+ newOperands = createInClauses(rexBuilder, stringToExpr, inLHSExprToRHSExprs);
+ newOperands.addAll(operands);
+ // Return node
+ node = RexUtil.composeConjunction(rexBuilder, newOperands, false);
+ break;
+ case OR:
+ // IN clauses need to be combined by keeping all elements
+ operands = Lists.newArrayList(RexUtil.flattenOr(((RexCall) call).getOperands()));
+ for (int i = 0; i < operands.size(); i++) {
+ RexNode operand = operands.get(i);
+ if (operand.getKind() == SqlKind.IN) {
+ RexCall inCall = (RexCall) operand;
+ if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) {
+ continue;
+ }
+ String ref = inCall.getOperands().get(0).toString();
+ stringToExpr.put(ref, inCall.getOperands().get(0));
+ for (int j = 1; j < inCall.getOperands().size(); j++) {
+ String expr = inCall.getOperands().get(j).toString();
+ inLHSExprToRHSExprs.put(ref, expr);
+ stringToExpr.put(expr, inCall.getOperands().get(j));
+ }
+ operands.remove(i);
+ --i;
+ }
+ }
+ // Create IN clauses
+ newOperands = createInClauses(rexBuilder, stringToExpr, inLHSExprToRHSExprs);
+ newOperands.addAll(operands);
+ // Return node
+ node = RexUtil.composeDisjunction(rexBuilder, newOperands, false);
+ break;
+ default:
+ return super.visitCall(call);
+ }
+ return node;
+ }
+
+ private static List<RexNode> createInClauses(RexBuilder rexBuilder, Map<String, RexNode> stringToExpr,
+ Multimap<String, String> inLHSExprToRHSExprs) {
+ List<RexNode> newExpressions = Lists.newArrayList();
+ for (Entry<String,Collection<String>> entry : inLHSExprToRHSExprs.asMap().entrySet()) {
+ String ref = entry.getKey();
+ Collection<String> exprs = entry.getValue();
+ if (exprs.isEmpty()) {
+ newExpressions.add(rexBuilder.makeLiteral(false));
+ } else {
+ List<RexNode> newOperands = new ArrayList<RexNode>(exprs.size() + 1);
+ newOperands.add(stringToExpr.get(ref));
+ for (String expr : exprs) {
+ newOperands.add(stringToExpr.get(expr));
+ }
+ newExpressions.add(rexBuilder.makeCall(HiveIn.INSTANCE, newOperands));
+ }
+ }
+ return newExpressions;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index 9cc9ea9..9911179 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -25,8 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Stack;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -49,13 +47,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Expression processor factory for partition condition removing. Each processor tries to
@@ -368,50 +365,66 @@ public final class PcrExprProcFactory {
return getResultWrapFromResults(results, fd, newNodeOutputs);
}
return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
- } else if (fd.getGenericUDF() instanceof GenericUDFIn) {
- List<ExprNodeDesc> children = fd.getChildren();
- boolean removePredElem = false;
- ExprNodeDesc lhs = children.get(0);
-
- if (lhs instanceof ExprNodeGenericFuncDesc) {
- // Make sure that the generic udf is deterministic
- if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs)
- .getGenericUDF())) {
- boolean hasOnlyPartCols = true;
- boolean hasDynamicListDesc = false;
-
- for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) {
- // Check if the current field expression contains only
- // partition column or a virtual column or constants.
- // If yes, this filter predicate is a candidate for this optimization.
- if (!(ed instanceof ExprNodeColumnDesc &&
- ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) {
- hasOnlyPartCols = false;
- break;
- }
- }
+ } else if (FunctionRegistry.isIn(fd)) {
+ List<ExprNodeDesc> children = fd.getChildren();
+ boolean removePredElem = false;
+ ExprNodeDesc lhs = children.get(0);
+
+ if (lhs instanceof ExprNodeColumnDesc) {
+ // It is an IN clause on a column
+ if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) {
+ // It is a partition column, we can proceed
+ removePredElem = true;
+ }
+ if (removePredElem) {
+ // We should not remove the dynamic partition pruner generated synthetic predicates.
+ for (int i = 1; i < children.size(); i++) {
+ if (children.get(i) instanceof ExprNodeDynamicListDesc) {
+ removePredElem = false;
+ break;
+ }
+ }
+ }
+ } else if (lhs instanceof ExprNodeGenericFuncDesc) {
+ // It is an IN clause on a struct
+ // Make sure that the generic udf is deterministic
+ if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs)
+ .getGenericUDF())) {
+ boolean hasOnlyPartCols = true;
+ boolean hasDynamicListDesc = false;
+
+ for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) {
+ // Check if the current field expression contains only
+ // partition column or a virtual column or constants.
+ // If yes, this filter predicate is a candidate for this optimization.
+ if (!(ed instanceof ExprNodeColumnDesc &&
+ ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) {
+ hasOnlyPartCols = false;
+ break;
+ }
+ }
- // If we have non-partition columns, we cannot remove the predicate.
- if (hasOnlyPartCols) {
- // We should not remove the dynamic partition pruner generated synthetic predicates.
- for (int i = 1; i < children.size(); i++) {
- if (children.get(i) instanceof ExprNodeDynamicListDesc) {
- hasDynamicListDesc = true;
- break;
- }
- }
+ // If we have non-partition columns, we cannot remove the predicate.
+ if (hasOnlyPartCols) {
+ // We should not remove the dynamic partition pruner generated synthetic predicates.
+ for (int i = 1; i < children.size(); i++) {
+ if (children.get(i) instanceof ExprNodeDynamicListDesc) {
+ hasDynamicListDesc = true;
+ break;
}
-
- removePredElem = hasOnlyPartCols && !hasDynamicListDesc;
}
+ }
+
+ removePredElem = hasOnlyPartCols && !hasDynamicListDesc;
}
+ }
- // If removePredElem is set to true, return true as this is a potential candidate
- // for partition condition remover. Else, set the WalkState for this node to unknown.
- return removePredElem ?
- new NodeInfoWrapper(WalkState.TRUE, null,
- new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) :
- new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ;
+ // If removePredElem is set to true, return true as this is a potential candidate
+ // for partition condition remover. Else, set the WalkState for this node to unknown.
+ return removePredElem ?
+ new NodeInfoWrapper(WalkState.TRUE, null,
+ new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) :
+ new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ;
} else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
// If it's a non-deterministic UDF, set unknown to true
return new NodeInfoWrapper(WalkState.UNKNOWN, null,
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index fd2246b..b59347d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTranspos
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
@@ -1138,23 +1139,32 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 3. Run exhaustive PPD, add not null filters, transitive inference,
// constant propagation, constant folding
+ List<RelOptRule> rules = Lists.newArrayList();
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) {
+ rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING);
+ } else {
+ rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC);
+ }
+ rules.add(HiveFilterSetOpTransposeRule.INSTANCE);
+ rules.add(HiveFilterSortTransposeRule.INSTANCE);
+ rules.add(HiveFilterJoinRule.JOIN);
+ rules.add(HiveFilterJoinRule.FILTER_ON_JOIN);
+ rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class));
+ rules.add(new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY));
+ rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE);
+ rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE);
+ rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE);
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
+ final int min = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+ rules.add(new HivePointLookupOptimizerRule(min));
+ }
+ rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN);
+ rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN);
+ rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN);
+ rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP,
- conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ? HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING
- : HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC,
- HiveFilterSetOpTransposeRule.INSTANCE,
- HiveFilterSortTransposeRule.INSTANCE,
- HiveFilterJoinRule.JOIN,
- HiveFilterJoinRule.FILTER_ON_JOIN,
- new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class),
- new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY),
- HiveReduceExpressionsRule.PROJECT_INSTANCE,
- HiveReduceExpressionsRule.FILTER_INSTANCE,
- HiveReduceExpressionsRule.JOIN_INSTANCE,
- HiveJoinAddNotNullRule.INSTANCE_JOIN,
- HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN,
- HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN,
- HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
+ rules.toArray(new RelOptRule[rules.size()]));
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding");
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/queries/clientpositive/auto_join19_inclause.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_join19_inclause.q b/ql/src/test/queries/clientpositive/auto_join19_inclause.q
new file mode 100644
index 0000000..7773289
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/auto_join19_inclause.q
@@ -0,0 +1,18 @@
+set hive.mapred.mode=nonstrict;
+set hive.auto.convert.join = true;
+set hive.optimize.point.lookup.min=2;
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
+
+explain
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11');
+
+
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11');
+
+
+SELECT sum(hash(dest1.key,dest1.value)) FROM dest1;
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/queries/clientpositive/filter_in_or_dup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/filter_in_or_dup.q b/ql/src/test/queries/clientpositive/filter_in_or_dup.q
new file mode 100644
index 0000000..34a5139
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/filter_in_or_dup.q
@@ -0,0 +1,19 @@
+set hive.optimize.point.lookup.min=2;
+
+EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2')
+AND f.key IN ('1', '2');
+
+EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key = '2')
+AND f.key IN ('1', '2', '3');
+
+EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2' OR f.key='3')
+AND f.key IN ('1', '2');
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
new file mode 100644
index 0000000..3f70055
--- /dev/null
+++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
@@ -0,0 +1,130 @@
+PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: explain
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:src2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:src2
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col4 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
+INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
+where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+407444119660
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/constprog_semijoin.q.out
index 0e0e883..940a148 100644
--- a/ql/src/test/results/clientpositive/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/constprog_semijoin.q.out
@@ -502,7 +502,7 @@ STAGE PLANS:
alias: table1
Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((dimid = 100) = true) and (dimid) IN (100, 200)) and (dimid = 100) is not null) (type: boolean)
+ predicate: (((dimid) IN (100, 200) and ((dimid = 100) = true)) and (dimid = 100) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int)
@@ -518,7 +518,7 @@ STAGE PLANS:
alias: table3
Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((id = 100) = true) and (id) IN (100, 200)) and (id = 100) is not null) (type: boolean)
+ predicate: (((id) IN (100, 200) and ((id = 100) = true)) and (id = 100) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), (id = 100) (type: boolean)
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
index ddb05e2..eca29df 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
TableScan
alias: acid
Filter Operator
- predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean)
+ predicate: (key = 'foo') (type: boolean)
Select Operator
expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
outputColumnNames: _col0, _col3
@@ -390,7 +390,7 @@ STAGE PLANS:
TableScan
alias: acid
Filter Operator
- predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean)
+ predicate: (key = 'foo') (type: boolean)
Select Operator
expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
outputColumnNames: _col0, _col3
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_in_or_dup.q.out b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
new file mode 100644
index 0000000..f863ac3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/filter_in_or_dup.q.out
@@ -0,0 +1,96 @@
+PREHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2')
+AND f.key IN ('1', '2')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2')
+AND f.key IN ('1', '2')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key = '2')
+AND f.key IN ('1', '2', '3')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key = '2')
+AND f.key IN ('1', '2', '3')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2' OR f.key='3')
+AND f.key IN ('1', '2')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key
+FROM cbo_t1 f
+WHERE (f.key = '1' OR f.key='2' OR f.key='3')
+AND f.key IN ('1', '2')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key) IN ('1', '2') (type: boolean)
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query13.q.out b/ql/src/test/results/clientpositive/perf/query13.q.out
index cc40e79..ad50576 100644
--- a/ql/src/test/results/clientpositive/perf/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/query13.q.out
@@ -128,7 +128,7 @@ Stage-0
SHUFFLE [RS_39]
Group By Operator [GBY_38] (rows=1 width=112)
Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col5)","avg(_col7)","avg(_col8)","sum(_col8)"]
- Merge Join Operator [MERGEJOIN_73] (rows=18150000 width=1014)
+ Merge Join Operator [MERGEJOIN_73] (rows=9075000 width=1014)
Conds:RS_34._col0=RS_35._col0(Inner),Output:["_col5","_col7","_col8"]
<-Map 12 [SIMPLE_EDGE]
SHUFFLE [RS_35]
@@ -142,19 +142,19 @@ Stage-0
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_34]
PartitionCols:_col0
- Select Operator [SEL_30] (rows=16500000 width=1014)
+ Select Operator [SEL_30] (rows=8250000 width=1014)
Output:["_col0","_col5","_col7","_col8"]
- Filter Operator [FIL_29] (rows=16500000 width=1014)
+ Filter Operator [FIL_29] (rows=8250000 width=1014)
predicate:(((_col17) IN ('KY', 'GA', 'NM') and _col9 BETWEEN 100 AND 200) or ((_col17) IN ('MT', 'OR', 'IN') and _col9 BETWEEN 150 AND 300) or ((_col17) IN ('WI', 'MO', 'WV') and _col9 BETWEEN 50 AND 250))
- Merge Join Operator [MERGEJOIN_72] (rows=22000000 width=1014)
+ Merge Join Operator [MERGEJOIN_72] (rows=11000000 width=1014)
Conds:RS_26._col3=RS_27._col0(Inner),Output:["_col0","_col5","_col7","_col8","_col9","_col17"]
<-Map 11 [SIMPLE_EDGE]
SHUFFLE [RS_27]
PartitionCols:_col0
- Select Operator [SEL_25] (rows=20000000 width=1014)
+ Select Operator [SEL_25] (rows=10000000 width=1014)
Output:["_col0","_col1"]
- Filter Operator [FIL_67] (rows=20000000 width=1014)
- predicate:((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null)
+ Filter Operator [FIL_67] (rows=10000000 width=1014)
+ predicate:(((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and ca_address_sk is not null)
TableScan [TS_23] (rows=40000000 width=1014)
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
<-Reducer 4 [SIMPLE_EDGE]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query27.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query27.q.out b/ql/src/test/results/clientpositive/perf/query27.q.out
index 635c402..3a32d7b 100644
--- a/ql/src/test/results/clientpositive/perf/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/query27.q.out
@@ -57,7 +57,7 @@ Stage-0
Select Operator [SEL_11] (rows=852 width=1910)
Output:["_col0","_col1"]
Filter Operator [FIL_53] (rows=852 width=1910)
- predicate:((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null)
+ predicate:((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null)
TableScan [TS_9] (rows=1704 width=1910)
default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
<-Reducer 3 [SIMPLE_EDGE]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query34.q.out b/ql/src/test/results/clientpositive/perf/query34.q.out
index 6fa6985..a08c3ff 100644
--- a/ql/src/test/results/clientpositive/perf/query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/query34.q.out
@@ -94,7 +94,7 @@ Stage-0
Select Operator [SEL_5] (rows=36524 width=1119)
Output:["_col0"]
Filter Operator [FIL_53] (rows=36524 width=1119)
- predicate:(((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+ predicate:(((d_year) IN (1998, 1999, 2000) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28)) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query48.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query48.q.out b/ql/src/test/results/clientpositive/perf/query48.q.out
index 691f5ad..d536bb5 100644
--- a/ql/src/test/results/clientpositive/perf/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/query48.q.out
@@ -23,7 +23,7 @@ Stage-0
SHUFFLE [RS_31]
Group By Operator [GBY_30] (rows=1 width=8)
Output:["_col0"],aggregations:["sum(_col4)"]
- Merge Join Operator [MERGEJOIN_57] (rows=18150000 width=1014)
+ Merge Join Operator [MERGEJOIN_57] (rows=9075000 width=1014)
Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col4"]
<-Map 10 [SIMPLE_EDGE]
SHUFFLE [RS_27]
@@ -37,19 +37,19 @@ Stage-0
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_26]
PartitionCols:_col0
- Select Operator [SEL_22] (rows=16500000 width=1014)
+ Select Operator [SEL_22] (rows=8250000 width=1014)
Output:["_col0","_col4"]
- Filter Operator [FIL_21] (rows=16500000 width=1014)
+ Filter Operator [FIL_21] (rows=8250000 width=1014)
predicate:(((_col12) IN ('KY', 'GA', 'NM') and _col6 BETWEEN 0 AND 2000) or ((_col12) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 AND 3000) or ((_col12) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 25000))
- Merge Join Operator [MERGEJOIN_56] (rows=22000000 width=1014)
+ Merge Join Operator [MERGEJOIN_56] (rows=11000000 width=1014)
Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col0","_col4","_col6","_col12"]
<-Map 9 [SIMPLE_EDGE]
SHUFFLE [RS_19]
PartitionCols:_col0
- Select Operator [SEL_11] (rows=20000000 width=1014)
+ Select Operator [SEL_11] (rows=10000000 width=1014)
Output:["_col0","_col1"]
- Filter Operator [FIL_52] (rows=20000000 width=1014)
- predicate:((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null)
+ Filter Operator [FIL_52] (rows=10000000 width=1014)
+ predicate:(((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and ca_address_sk is not null)
TableScan [TS_9] (rows=40000000 width=1014)
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
<-Reducer 3 [SIMPLE_EDGE]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query68.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query68.q.out b/ql/src/test/results/clientpositive/perf/query68.q.out
index 7828cfc..38e4644 100644
--- a/ql/src/test/results/clientpositive/perf/query68.q.out
+++ b/ql/src/test/results/clientpositive/perf/query68.q.out
@@ -128,7 +128,7 @@ Stage-0
Select Operator [SEL_5] (rows=18262 width=1119)
Output:["_col0"]
Filter Operator [FIL_79] (rows=18262 width=1119)
- predicate:((d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+ predicate:(((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query73.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query73.q.out b/ql/src/test/results/clientpositive/perf/query73.q.out
index e367f51..cf3a75e 100644
--- a/ql/src/test/results/clientpositive/perf/query73.q.out
+++ b/ql/src/test/results/clientpositive/perf/query73.q.out
@@ -94,7 +94,7 @@ Stage-0
Select Operator [SEL_5] (rows=18262 width=1119)
Output:["_col0"]
Filter Operator [FIL_53] (rows=18262 width=1119)
- predicate:((d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+ predicate:(((d_year) IN (1998, 1999, 2000) and d_dom BETWEEN 1 AND 2) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query79.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query79.q.out b/ql/src/test/results/clientpositive/perf/query79.q.out
index fdc5773..bf537b9 100644
--- a/ql/src/test/results/clientpositive/perf/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/query79.q.out
@@ -96,7 +96,7 @@ Stage-0
Select Operator [SEL_5] (rows=18262 width=1119)
Output:["_col0"]
Filter Operator [FIL_53] (rows=18262 width=1119)
- predicate:(((d_dow = 1) and (d_year) IN (1998, 1999, 2000)) and d_date_sk is not null)
+ predicate:(((d_year) IN (1998, 1999, 2000) and (d_dow = 1)) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query82.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query82.q.out b/ql/src/test/results/clientpositive/perf/query82.q.out
index 2461644..57a50c7 100644
--- a/ql/src/test/results/clientpositive/perf/query82.q.out
+++ b/ql/src/test/results/clientpositive/perf/query82.q.out
@@ -51,7 +51,7 @@ Stage-0
Select Operator [SEL_2] (rows=115500 width=1436)
Output:["_col0","_col1","_col2","_col3"]
Filter Operator [FIL_38] (rows=115500 width=1436)
- predicate:((i_current_price BETWEEN 30 AND 60 and (i_manufact_id) IN (437, 129, 727, 663)) and i_item_sk is not null)
+ predicate:(((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60) and i_item_sk is not null)
TableScan [TS_0] (rows=462000 width=1436)
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"]
<-Map 6 [SIMPLE_EDGE]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/perf/query85.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query85.q.out b/ql/src/test/results/clientpositive/perf/query85.q.out
index 72ac500..93b5f4e 100644
--- a/ql/src/test/results/clientpositive/perf/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/query85.q.out
@@ -23,22 +23,22 @@ Stage-0
File Output Operator [FS_57]
Limit [LIM_56] (rows=100 width=1014)
Number of rows:100
- Select Operator [SEL_55] (rows=9982500 width=1014)
+ Select Operator [SEL_55] (rows=4991250 width=1014)
Output:["_col0","_col1","_col2","_col3"]
<-Reducer 9 [SIMPLE_EDGE]
SHUFFLE [RS_54]
- Select Operator [SEL_53] (rows=9982500 width=1014)
+ Select Operator [SEL_53] (rows=4991250 width=1014)
Output:["_col0","_col1","_col2","_col3"]
- Group By Operator [GBY_52] (rows=9982500 width=1014)
+ Group By Operator [GBY_52] (rows=4991250 width=1014)
Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)"],keys:KEY._col0
<-Reducer 8 [SIMPLE_EDGE]
SHUFFLE [RS_51]
PartitionCols:_col0
- Group By Operator [GBY_50] (rows=19965000 width=1014)
+ Group By Operator [GBY_50] (rows=9982500 width=1014)
Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col4)","avg(_col14)","avg(_col13)"],keys:_col28
- Select Operator [SEL_49] (rows=19965000 width=1014)
+ Select Operator [SEL_49] (rows=9982500 width=1014)
Output:["_col28","_col4","_col14","_col13"]
- Merge Join Operator [MERGEJOIN_107] (rows=19965000 width=1014)
+ Merge Join Operator [MERGEJOIN_107] (rows=9982500 width=1014)
Conds:RS_46._col11=RS_47._col0(Inner),Output:["_col4","_col13","_col14","_col28"]
<-Map 17 [SIMPLE_EDGE]
SHUFFLE [RS_47]
@@ -52,7 +52,7 @@ Stage-0
<-Reducer 7 [SIMPLE_EDGE]
SHUFFLE [RS_46]
PartitionCols:_col11
- Merge Join Operator [MERGEJOIN_106] (rows=18150000 width=1014)
+ Merge Join Operator [MERGEJOIN_106] (rows=9075000 width=1014)
Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col4","_col11","_col13","_col14"]
<-Map 16 [SIMPLE_EDGE]
SHUFFLE [RS_44]
@@ -66,19 +66,19 @@ Stage-0
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_43]
PartitionCols:_col0
- Select Operator [SEL_36] (rows=16500000 width=1014)
+ Select Operator [SEL_36] (rows=8250000 width=1014)
Output:["_col0","_col11","_col13","_col14","_col4"]
- Filter Operator [FIL_35] (rows=16500000 width=1014)
+ Filter Operator [FIL_35] (rows=8250000 width=1014)
predicate:(((_col23) IN ('KY', 'GA', 'NM') and _col6 BETWEEN 100 AND 200) or ((_col23) IN ('MT', 'OR', 'IN') and _col6 BETWEEN 150 AND 300) or ((_col23) IN ('WI', 'MO', 'WV') and _col6 BETWEEN 50 AND 250))
- Merge Join Operator [MERGEJOIN_105] (rows=22000000 width=1014)
+ Merge Join Operator [MERGEJOIN_105] (rows=11000000 width=1014)
Conds:RS_32._col9=RS_33._col0(Inner),Output:["_col0","_col4","_col6","_col11","_col13","_col14","_col23"]
<-Map 15 [SIMPLE_EDGE]
SHUFFLE [RS_33]
PartitionCols:_col0
- Select Operator [SEL_28] (rows=20000000 width=1014)
+ Select Operator [SEL_28] (rows=10000000 width=1014)
Output:["_col0","_col1"]
- Filter Operator [FIL_98] (rows=20000000 width=1014)
- predicate:((((ca_state) IN ('KY', 'GA', 'NM') or (ca_state) IN ('MT', 'OR', 'IN') or (ca_state) IN ('WI', 'MO', 'WV')) and (ca_country = 'United States')) and ca_address_sk is not null)
+ Filter Operator [FIL_98] (rows=10000000 width=1014)
+ predicate:(((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) and ca_address_sk is not null)
TableScan [TS_26] (rows=40000000 width=1014)
default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"]
<-Reducer 5 [SIMPLE_EDGE]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
index fb17e72..869e4cd 100644
--- a/ql/src/test/results/clientpositive/pointlookup2.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -985,21 +985,17 @@ STAGE PLANS:
alias: t1
Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean)
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string), ds (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- auto parallelism: false
+ Select Operator
+ expressions: key (type: int), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ auto parallelism: false
TableScan
alias: t2
Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
@@ -1169,11 +1165,11 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
- Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1201,7 +1197,7 @@ STAGE PLANS:
key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
null sort order: aaa
sort order: +++
- Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
auto parallelism: false
@@ -1235,13 +1231,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out
index d5c4157..e98ba76 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -129,7 +129,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
+ predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
@@ -374,14 +374,14 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (key = 1) (type: boolean)
+ predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string), ds1 (type: string)
- outputColumnNames: _col1, _col2
+ expressions: key (type: int), value (type: string), ds1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: 1 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string)
+ key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string)
null sort order: aaaa
sort order: ++++
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
@@ -441,7 +441,7 @@ STAGE PLANS:
Needs Tagging: false
Reduce Operator Tree:
Select Operator
- expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string)
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1149,21 +1149,17 @@ STAGE PLANS:
alias: t1
Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (ds1) IN ('2000-04-08', '2000-04-09') (type: boolean)
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string)
- auto parallelism: false
+ Select Operator
+ expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ auto parallelism: false
TableScan
alias: t1
Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
@@ -1337,11 +1333,11 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
Filter Operator
isSamplingPred: false
- predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1369,7 +1365,7 @@ STAGE PLANS:
key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
null sort order: aaa
sort order: +++
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string)
auto parallelism: false
@@ -1403,13 +1399,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/pointlookup4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out b/ql/src/test/results/clientpositive/pointlookup4.q.out
index 0a9bd3e..6236272 100644
--- a/ql/src/test/results/clientpositive/pointlookup4.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup4.q.out
@@ -384,7 +384,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (struct(ds1,key,ds2)) IN (const struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09')) (type: boolean)
+ predicate: (struct(key,ds1,ds2)) IN (const struct(1,'2000-04-08','2001-04-08'), const struct(2,'2000-04-09','2001-04-09')) (type: boolean)
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
index 2547405..0ab1365 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
@@ -523,7 +523,7 @@ STAGE PLANS:
alias: table1
Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((dimid = 100) = true) and (dimid) IN (100, 200)) and (dimid = 100) is not null) (type: boolean)
+ predicate: (((dimid) IN (100, 200) and ((dimid = 100) = true)) and (dimid = 100) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), val (type: string), val1 (type: string), dimid (type: int)
@@ -541,7 +541,7 @@ STAGE PLANS:
alias: table3
Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((((id = 100) = true) and (id) IN (100, 200)) and (id = 100) is not null) (type: boolean)
+ predicate: (((id) IN (100, 200) and ((id = 100) = true)) and (id = 100) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), (id = 100) (type: boolean)
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out b/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
index 5315f2c..3557a3b 100644
--- a/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
+++ b/ql/src/test/results/clientpositive/tez/bucketpruning1.q.out
@@ -1011,13 +1011,13 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcbucket_pruned
- filterExpr: (((value = 'One') and (key) IN (2, 3)) and (ds = '2008-04-08')) (type: boolean)
+ filterExpr: (((key) IN (2, 3) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean)
buckets included: [2,3,] of 16
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (((value = 'One') and (key) IN (2, 3)) and (ds = '2008-04-08')) (type: boolean)
+ predicate: (((key) IN (2, 3) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string)
@@ -1700,12 +1700,12 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcbucket_pruned
- filterExpr: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: boolean)
+ filterExpr: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: (((value = 'One') and (key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) and (ds = '2008-04-08')) (type: boolean)
+ predicate: (((key) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) and (value = 'One')) and (ds = '2008-04-08')) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
index 7a9932a..8fecbd7 100644
--- a/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/tez/constprog_semijoin.q.out
@@ -317,7 +317,7 @@ Stage-0
Select Operator [SEL_2] (rows=2 width=20)
Output:["_col0","_col1","_col2","_col3"]
Filter Operator [FIL_15] (rows=2 width=20)
- predicate:((((dimid = 100) = true) and (dimid) IN (100, 200)) and (dimid = 100) is not null)
+ predicate:(((dimid) IN (100, 200) and ((dimid = 100) = true)) and (dimid = 100) is not null)
TableScan [TS_0] (rows=10 width=20)
default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"]
<-Map 3 [SIMPLE_EDGE]
@@ -328,7 +328,7 @@ Stage-0
Select Operator [SEL_5] (rows=1 width=3)
Output:["_col0","_col1"]
Filter Operator [FIL_17] (rows=1 width=3)
- predicate:((((id = 100) = true) and (id) IN (100, 200)) and (id = 100) is not null)
+ predicate:(((id) IN (100, 200) and ((id = 100) = true)) and (id = 100) is not null)
TableScan [TS_3] (rows=5 width=3)
default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
index 0d6ad69..5a2ab91 100644
--- a/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_mr_diff_schema_alias.q.out
@@ -278,7 +278,7 @@ STAGE PLANS:
alias: store
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null) (type: boolean)
+ predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_state (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/8c8ff3f1/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
index 9fce991..a9e25e1 100644
--- a/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/vector_mr_diff_schema_alias.q.out
@@ -269,7 +269,7 @@ STAGE PLANS:
alias: store
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: ((s_state) IN ('KS', 'AL', 'MN', 'AL', 'SC', 'VT') and s_store_sk is not null) (type: boolean)
+ predicate: ((s_state) IN ('KS', 'AL', 'MN', 'SC', 'VT') and s_store_sk is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_state (type: string)