You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/08/31 22:43:20 UTC
[2/2] hive git commit: HIVE-14652 : incorrect results for not in on
partition columns (Sergey Shelukhin, reviewed by Jesus Camacho Rodriguez)
HIVE-14652 : incorrect results for not in on partition columns (Sergey Shelukhin, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9b39504
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9b39504
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9b39504
Branch: refs/heads/branch-2.1
Commit: b9b395046f11f427fad0a6f5f7b50bb6db6e354e
Parents: 8a25c54
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Aug 31 15:36:47 2016 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Aug 31 15:42:50 2016 -0700
----------------------------------------------------------------------
.../ql/optimizer/pcr/PcrExprProcFactory.java | 423 +++++++++----------
.../hadoop/hive/ql/parse/ParseContext.java | 25 ++
.../partition_condition_remover.q | 13 +
.../partition_condition_remover.q.out | 79 ++++
ql/src/test/results/clientpositive/pcs.q.out | 4 +-
5 files changed, 330 insertions(+), 214 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index f9388e2..461dbe5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.optimizer.pcr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
+
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
@@ -185,7 +187,7 @@ public final class PcrExprProcFactory {
}
public enum WalkState {
- PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED
+ PART_COL, TRUE, FALSE, CONSTANT, UNKNOWN, DIVIDED, PART_COL_STRUCT
}
public static class NodeInfoWrapper {
@@ -253,242 +255,239 @@ public final class PcrExprProcFactory {
Object... nodeOutputs) throws SemanticException {
PcrExprProcCtx ctx = (PcrExprProcCtx) procCtx;
ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
+ if (LOG.isDebugEnabled()) {
+ String err = "Processing " + fd.getExprString() + " "
+ + fd.getGenericUDF().getUdfName() + " outputs ";
+ for (Object child : nodeOutputs) {
+ NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
+ err += "{" + wrapper.state + ", " + wrapper.outExpr + "}, ";
+ }
+ LOG.debug(err);
+ }
if (FunctionRegistry.isOpNot(fd)) {
- assert (nodeOutputs.length == 1);
- NodeInfoWrapper wrapper = (NodeInfoWrapper) nodeOutputs[0];
- if (wrapper.state == WalkState.TRUE) {
- ExprNodeConstantDesc falseDesc = new ExprNodeConstantDesc(
- wrapper.outExpr.getTypeInfo(), Boolean.FALSE);
- return new NodeInfoWrapper(WalkState.FALSE, null, falseDesc);
- } else if (wrapper.state == WalkState.FALSE) {
- ExprNodeConstantDesc trueDesc = new ExprNodeConstantDesc(
- wrapper.outExpr.getTypeInfo(), Boolean.TRUE);
- return new NodeInfoWrapper(WalkState.TRUE, null, trueDesc);
- } else if (wrapper.state == WalkState.DIVIDED) {
+ return handleUdfNot(ctx, fd, nodeOutputs);
+ } else if (FunctionRegistry.isOpAnd(fd)) {
+ return handleUdfAnd(ctx, fd, nodeOutputs);
+ } else if (FunctionRegistry.isOpOr(fd)) {
+ return handleUdfOr(ctx, fd, nodeOutputs);
+ } else if (FunctionRegistry.isIn(fd)) {
+ List<ExprNodeDesc> children = fd.getChildren();
+ // We should not remove the dynamic partition pruner generated synthetic predicates.
+ for (int i = 1; i < children.size(); i++) {
+ if (children.get(i) instanceof ExprNodeDynamicListDesc) {
+ return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+ }
+ }
+ // Otherwise, handle like a normal generic UDF.
+ return handleDeterministicUdf(ctx, fd, nodeOutputs);
+ } else if (fd.getGenericUDF() instanceof GenericUDFStruct) {
+ // Handle structs composed of partition columns,
+ for (Object child : nodeOutputs) {
+ NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
+ if (wrapper.state != WalkState.PART_COL) {
+ return handleDeterministicUdf(ctx, fd, nodeOutputs); // Giving up.
+ }
+ }
+ return new NodeInfoWrapper(WalkState.PART_COL_STRUCT, null, getOutExpr(fd, nodeOutputs));
+ } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
+ // If it's a non-deterministic UDF, set unknown to true
+ return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+ } else {
+ return handleDeterministicUdf(ctx, fd, nodeOutputs);
+ }
+ }
+
+ private Object handleDeterministicUdf(PcrExprProcCtx ctx,
+ ExprNodeGenericFuncDesc fd, Object... nodeOutputs)
+ throws SemanticException {
+ Boolean has_part_col = checkForPartColsAndUnknown(fd, nodeOutputs);
+ if (has_part_col == null) {
+ return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+ }
+
+ if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) {
+ // we need to evaluate result for every pruned partition
+ if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) {
+ // if the return type of the GenericUDF is boolean and all partitions agree on
+ // a result, we update the state of the node to be TRUE of FALSE
Boolean[] results = new Boolean[ctx.getPartList().size()];
for (int i = 0; i < ctx.getPartList().size(); i++) {
- results[i] = opNot(wrapper.ResultVector[i]);
+ results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i),
+ ctx.getVirtualColumns());
}
- return new NodeInfoWrapper(WalkState.DIVIDED, results,
- getOutExpr(fd, nodeOutputs));
+ return getResultWrapFromResults(results, fd, nodeOutputs);
+ }
+
+ // the case that return type of the GenericUDF is not boolean, and if not all partition
+ // agree on result, we make the node UNKNOWN. If they all agree, we replace the node
+ // to be a CONSTANT node with value to be the agreed result.
+ Object[] results = new Object[ctx.getPartList().size()];
+ for (int i = 0; i < ctx.getPartList().size(); i++) {
+ results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns());
+ }
+ Object result = ifResultsAgree(results);
+ if (result == null) {
+ // if the result is not boolean and not all partition agree on the
+ // result, we don't remove the condition. Potentially, it can miss
+ // the case like "where ds % 3 == 1 or ds % 3 == 2"
+ // TODO: handle this case by making result vector to handle all
+ // constant values.
+ return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
+ }
+ return new NodeInfoWrapper(WalkState.CONSTANT, null,
+ new ExprNodeConstantDesc(fd.getTypeInfo(), result));
+ }
+
+ // Try to fold, otherwise return the expression itself
+ final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs);
+ final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc);
+ if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc;
+ if (Boolean.TRUE.equals(constant.getValue())) {
+ return new NodeInfoWrapper(WalkState.TRUE, null, constant);
+ } else if (Boolean.FALSE.equals(constant.getValue())) {
+ return new NodeInfoWrapper(WalkState.FALSE, null, constant);
} else {
- return new NodeInfoWrapper(wrapper.state, null,
- getOutExpr(fd, nodeOutputs));
+ return new NodeInfoWrapper(WalkState.CONSTANT, null, constant);
}
- } else if (FunctionRegistry.isOpAnd(fd)) {
- boolean anyUnknown = false; // Whether any of the node outputs is unknown
- boolean allDivided = true; // Whether all of the node outputs are divided
- List<NodeInfoWrapper> newNodeOutputsList =
- new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
- for (int i = 0; i < nodeOutputs.length; i++) {
- NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
- if (c.state == WalkState.FALSE) {
- return c;
- }
- if (c.state == WalkState.UNKNOWN) {
- anyUnknown = true;
- }
- if (c.state != WalkState.DIVIDED) {
- allDivided = false;
- }
- if (c.state != WalkState.TRUE) {
- newNodeOutputsList.add(c);
- }
+ }
+ return new NodeInfoWrapper(WalkState.CONSTANT, null, desc);
+ }
+
+ private Boolean checkForPartColsAndUnknown(ExprNodeGenericFuncDesc fd,
+ Object... nodeOutputs) {
+ boolean has_part_col = false;
+ for (Object child : nodeOutputs) {
+ NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
+ if (wrapper.state == WalkState.UNKNOWN) {
+ return null;
+ } else if (wrapper.state == WalkState.PART_COL
+ || wrapper.state == WalkState.PART_COL_STRUCT) {
+ has_part_col = true;
}
- // If all of them were true, return true
- if (newNodeOutputsList.size() == 0) {
- return new NodeInfoWrapper(WalkState.TRUE, null,
- new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE));
+ }
+ return has_part_col;
+ }
+
+ private Object handleUdfOr(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd,
+ Object... nodeOutputs) {
+ boolean anyUnknown = false; // Whether any of the node outputs is unknown
+ boolean allDivided = true; // Whether all of the node outputs are divided
+ List<NodeInfoWrapper> newNodeOutputsList =
+ new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
+ for (int i = 0; i< nodeOutputs.length; i++) {
+ NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
+ if (c.state == WalkState.TRUE) {
+ return c;
}
- // If we are left with a single child, return the child
- if (newNodeOutputsList.size() == 1) {
- return newNodeOutputsList.get(0);
+ if (c.state == WalkState.UNKNOWN) {
+ anyUnknown = true;
}
- Object[] newNodeOutputs = newNodeOutputsList.toArray();
- if (anyUnknown) {
- return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+ if (c.state != WalkState.DIVIDED) {
+ allDivided = false;
}
- if (allDivided) {
- Boolean[] results = new Boolean[ctx.getPartList().size()];
- for (int i = 0; i < ctx.getPartList().size(); i++) {
- Boolean[] andArray = new Boolean[newNodeOutputs.length];
- for (int j = 0; j < newNodeOutputs.length; j++) {
- andArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
- }
- results[i] = opAnd(andArray);
- }
- return getResultWrapFromResults(results, fd, newNodeOutputs);
+ if (c.state != WalkState.FALSE) {
+ newNodeOutputsList.add(c);
}
+ }
+ // If all of them were false, return false
+ if (newNodeOutputsList.size() == 0) {
+ return new NodeInfoWrapper(WalkState.FALSE, null,
+ new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.FALSE));
+ }
+ // If we are left with a single child, return the child
+ if (newNodeOutputsList.size() == 1) {
+ return newNodeOutputsList.get(0);
+ }
+ Object[] newNodeOutputs = newNodeOutputsList.toArray();
+ if (anyUnknown) {
return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
- } else if (FunctionRegistry.isOpOr(fd)) {
- boolean anyUnknown = false; // Whether any of the node outputs is unknown
- boolean allDivided = true; // Whether all of the node outputs are divided
- List<NodeInfoWrapper> newNodeOutputsList =
- new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
- for (int i = 0; i< nodeOutputs.length; i++) {
- NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
- if (c.state == WalkState.TRUE) {
- return c;
- }
- if (c.state == WalkState.UNKNOWN) {
- anyUnknown = true;
- }
- if (c.state != WalkState.DIVIDED) {
- allDivided = false;
- }
- if (c.state != WalkState.FALSE) {
- newNodeOutputsList.add(c);
+ }
+ if (allDivided) {
+ Boolean[] results = new Boolean[ctx.getPartList().size()];
+ for (int i = 0; i < ctx.getPartList().size(); i++) {
+ Boolean[] orArray = new Boolean[newNodeOutputs.length];
+ for (int j = 0; j < newNodeOutputs.length; j++) {
+ orArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
}
+ results[i] = opOr(orArray);
}
- // If all of them were false, return false
- if (newNodeOutputsList.size() == 0) {
- return new NodeInfoWrapper(WalkState.FALSE, null,
- new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.FALSE));
+ return getResultWrapFromResults(results, fd, newNodeOutputs);
+ }
+ return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+ }
+
+ private Object handleUdfAnd(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd,
+ Object... nodeOutputs) {
+ boolean anyUnknown = false; // Whether any of the node outputs is unknown
+ boolean allDivided = true; // Whether all of the node outputs are divided
+ List<NodeInfoWrapper> newNodeOutputsList =
+ new ArrayList<NodeInfoWrapper>(nodeOutputs.length);
+ for (int i = 0; i < nodeOutputs.length; i++) {
+ NodeInfoWrapper c = (NodeInfoWrapper)nodeOutputs[i];
+ if (c.state == WalkState.FALSE) {
+ return c;
}
- // If we are left with a single child, return the child
- if (newNodeOutputsList.size() == 1) {
- return newNodeOutputsList.get(0);
+ if (c.state == WalkState.UNKNOWN) {
+ anyUnknown = true;
}
- Object[] newNodeOutputs = newNodeOutputsList.toArray();
- if (anyUnknown) {
- return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+ if (c.state != WalkState.DIVIDED) {
+ allDivided = false;
}
- if (allDivided) {
- Boolean[] results = new Boolean[ctx.getPartList().size()];
- for (int i = 0; i < ctx.getPartList().size(); i++) {
- Boolean[] orArray = new Boolean[newNodeOutputs.length];
- for (int j = 0; j < newNodeOutputs.length; j++) {
- orArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
- }
- results[i] = opOr(orArray);
- }
- return getResultWrapFromResults(results, fd, newNodeOutputs);
+ if (c.state != WalkState.TRUE) {
+ newNodeOutputsList.add(c);
}
+ }
+ // If all of them were true, return true
+ if (newNodeOutputsList.size() == 0) {
+ return new NodeInfoWrapper(WalkState.TRUE, null,
+ new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE));
+ }
+ // If we are left with a single child, return the child
+ if (newNodeOutputsList.size() == 1) {
+ return newNodeOutputsList.get(0);
+ }
+ Object[] newNodeOutputs = newNodeOutputsList.toArray();
+ if (anyUnknown) {
return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
- } else if (FunctionRegistry.isIn(fd)) {
- List<ExprNodeDesc> children = fd.getChildren();
- boolean removePredElem = false;
- ExprNodeDesc lhs = children.get(0);
-
- if (lhs instanceof ExprNodeColumnDesc) {
- // It is an IN clause on a column
- if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) {
- // It is a partition column, we can proceed
- removePredElem = true;
- }
- if (removePredElem) {
- // We should not remove the dynamic partition pruner generated synthetic predicates.
- for (int i = 1; i < children.size(); i++) {
- if (children.get(i) instanceof ExprNodeDynamicListDesc) {
- removePredElem = false;
- break;
- }
- }
- }
- } else if (lhs instanceof ExprNodeGenericFuncDesc) {
- // It is an IN clause on a struct
- // Make sure that the generic udf is deterministic
- if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs)
- .getGenericUDF())) {
- boolean hasOnlyPartCols = true;
- boolean hasDynamicListDesc = false;
-
- for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) {
- // Check if the current field expression contains only
- // partition column or a virtual column or constants.
- // If yes, this filter predicate is a candidate for this optimization.
- if (!(ed instanceof ExprNodeColumnDesc &&
- ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) {
- hasOnlyPartCols = false;
- break;
- }
- }
-
- // If we have non-partition columns, we cannot remove the predicate.
- if (hasOnlyPartCols) {
- // We should not remove the dynamic partition pruner generated synthetic predicates.
- for (int i = 1; i < children.size(); i++) {
- if (children.get(i) instanceof ExprNodeDynamicListDesc) {
- hasDynamicListDesc = true;
- break;
- }
- }
- }
-
- removePredElem = hasOnlyPartCols && !hasDynamicListDesc;
+ }
+ if (allDivided) {
+ Boolean[] results = new Boolean[ctx.getPartList().size()];
+ for (int i = 0; i < ctx.getPartList().size(); i++) {
+ Boolean[] andArray = new Boolean[newNodeOutputs.length];
+ for (int j = 0; j < newNodeOutputs.length; j++) {
+ andArray[j] = ((NodeInfoWrapper) newNodeOutputs[j]).ResultVector[i];
}
+ results[i] = opAnd(andArray);
}
+ return getResultWrapFromResults(results, fd, newNodeOutputs);
+ }
+ return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs));
+ }
- // If removePredElem is set to true, return true as this is a potential candidate
- // for partition condition remover. Else, set the WalkState for this node to unknown.
- return removePredElem ?
- new NodeInfoWrapper(WalkState.TRUE, null,
- new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) :
- new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ;
- } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
- // If it's a non-deterministic UDF, set unknown to true
- return new NodeInfoWrapper(WalkState.UNKNOWN, null,
+ private Object handleUdfNot(PcrExprProcCtx ctx, ExprNodeGenericFuncDesc fd,
+ Object... nodeOutputs) {
+ assert (nodeOutputs.length == 1);
+ NodeInfoWrapper wrapper = (NodeInfoWrapper) nodeOutputs[0];
+ if (wrapper.state == WalkState.TRUE) {
+ ExprNodeConstantDesc falseDesc = new ExprNodeConstantDesc(
+ wrapper.outExpr.getTypeInfo(), Boolean.FALSE);
+ return new NodeInfoWrapper(WalkState.FALSE, null, falseDesc);
+ } else if (wrapper.state == WalkState.FALSE) {
+ ExprNodeConstantDesc trueDesc = new ExprNodeConstantDesc(
+ wrapper.outExpr.getTypeInfo(), Boolean.TRUE);
+ return new NodeInfoWrapper(WalkState.TRUE, null, trueDesc);
+ } else if (wrapper.state == WalkState.DIVIDED) {
+ Boolean[] results = new Boolean[ctx.getPartList().size()];
+ for (int i = 0; i < ctx.getPartList().size(); i++) {
+ results[i] = opNot(wrapper.ResultVector[i]);
+ }
+ return new NodeInfoWrapper(WalkState.DIVIDED, results,
getOutExpr(fd, nodeOutputs));
} else {
- // If any child is unknown, set unknown to true
- boolean has_part_col = false;
- for (Object child : nodeOutputs) {
- NodeInfoWrapper wrapper = (NodeInfoWrapper) child;
- if (wrapper.state == WalkState.UNKNOWN) {
- return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
- } else if (wrapper.state == WalkState.PART_COL) {
- has_part_col = true;
- }
- }
-
- if (has_part_col && fd.getTypeInfo().getCategory() == Category.PRIMITIVE) {
- // we need to evaluate result for every pruned partition
- if (fd.getTypeInfo().equals(TypeInfoFactory.booleanTypeInfo)) {
- // if the return type of the GenericUDF is boolean and all partitions agree on
- // a result, we update the state of the node to be TRUE of FALSE
- Boolean[] results = new Boolean[ctx.getPartList().size()];
- for (int i = 0; i < ctx.getPartList().size(); i++) {
- results[i] = (Boolean) evalExprWithPart(fd, ctx.getPartList().get(i),
- ctx.getVirtualColumns());
- }
- return getResultWrapFromResults(results, fd, nodeOutputs);
- }
-
- // the case that return type of the GenericUDF is not boolean, and if not all partition
- // agree on result, we make the node UNKNOWN. If they all agree, we replace the node
- // to be a CONSTANT node with value to be the agreed result.
- Object[] results = new Object[ctx.getPartList().size()];
- for (int i = 0; i < ctx.getPartList().size(); i++) {
- results[i] = evalExprWithPart(fd, ctx.getPartList().get(i), ctx.getVirtualColumns());
- }
- Object result = ifResultsAgree(results);
- if (result == null) {
- // if the result is not boolean and not all partition agree on the
- // result, we don't remove the condition. Potentially, it can miss
- // the case like "where ds % 3 == 1 or ds % 3 == 2"
- // TODO: handle this case by making result vector to handle all
- // constant values.
- return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs));
- }
- return new NodeInfoWrapper(WalkState.CONSTANT, null,
- new ExprNodeConstantDesc(fd.getTypeInfo(), result));
- }
-
- // Try to fold, otherwise return the expression itself
- final ExprNodeGenericFuncDesc desc = getOutExpr(fd, nodeOutputs);
- final ExprNodeDesc foldedDesc = ConstantPropagateProcFactory.foldExpr(desc);
- if (foldedDesc != null && foldedDesc instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc constant = (ExprNodeConstantDesc) foldedDesc;
- if (Boolean.TRUE.equals(constant.getValue())) {
- return new NodeInfoWrapper(WalkState.TRUE, null, constant);
- } else if (Boolean.FALSE.equals(constant.getValue())) {
- return new NodeInfoWrapper(WalkState.FALSE, null, constant);
- } else {
- return new NodeInfoWrapper(WalkState.CONSTANT, null, constant);
- }
- }
- return new NodeInfoWrapper(WalkState.CONSTANT, null, desc);
+ return new NodeInfoWrapper(wrapper.state, null,
+ getOutExpr(fd, nodeOutputs));
}
}
};
http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index b2125ca..4353d3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -19,6 +19,8 @@
package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -27,6 +29,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
@@ -620,4 +623,26 @@ public class ParseContext {
List<ColumnStatsAutoGatherContext> columnStatsAutoGatherContexts) {
this.columnStatsAutoGatherContexts = columnStatsAutoGatherContexts;
}
+
+ public Collection<Operator> getAllOps() {
+ List<Operator> ops = new ArrayList<>();
+ Set<Operator> visited = new HashSet<Operator>();
+ for (Operator<?> op : getTopOps().values()) {
+ getAllOps(ops, visited, op);
+ }
+ return ops;
+ }
+
+ private static void getAllOps(List<Operator> builder, Set<Operator> visited, Operator<?> op) {
+ boolean added = visited.add(op);
+ builder.add(op);
+ if (!added) return;
+ if (op.getNumChild() > 0) {
+ List<Operator<?>> children = op.getChildOperators();
+ for (int i = 0; i < children.size(); i++) {
+ getAllOps(builder, visited, children.get(i));
+ }
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/queries/clientpositive/partition_condition_remover.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/partition_condition_remover.q b/ql/src/test/queries/clientpositive/partition_condition_remover.q
new file mode 100644
index 0000000..39e58b8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/partition_condition_remover.q
@@ -0,0 +1,13 @@
+
+drop table foo;
+
+create table foo (i int) partitioned by (s string);
+
+insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10;
+insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10;
+
+explain select * from foo where s not in ('bar');
+select * from foo where s not in ('bar');
+
+
+drop table foo;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/results/clientpositive/partition_condition_remover.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partition_condition_remover.q.out b/ql/src/test/results/clientpositive/partition_condition_remover.q.out
new file mode 100644
index 0000000..2f8f998
--- /dev/null
+++ b/ql/src/test/results/clientpositive/partition_condition_remover.q.out
@@ -0,0 +1,79 @@
+PREHOOK: query: drop table foo
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table foo
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table foo (i int) partitioned by (s string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo (i int) partitioned by (s string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@foo@s=foo
+POSTHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@foo@s=foo
+POSTHOOK: Lineage: foo PARTITION(s=foo).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@foo@s=bar
+POSTHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@foo@s=bar
+POSTHOOK: Lineage: foo PARTITION(s=bar).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+PREHOOK: query: explain select * from foo where s not in ('bar')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from foo where s not in ('bar')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: foo
+ Statistics: Num rows: 10 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i (type: int), s (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: select * from foo where s not in ('bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Input: default@foo@s=foo
+#### A masked pattern was here ####
+POSTHOOK: query: select * from foo where s not in ('bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Input: default@foo@s=foo
+#### A masked pattern was here ####
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+528534767 foo
+PREHOOK: query: drop table foo
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo
+POSTHOOK: query: drop table foo
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo
http://git-wip-us.apache.org/repos/asf/hive/blob/b9b39504/ql/src/test/results/clientpositive/pcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out
index 8b99401..22e7fd2 100644
--- a/ql/src/test/results/clientpositive/pcs.q.out
+++ b/ql/src/test/results/clientpositive/pcs.q.out
@@ -1047,7 +1047,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean)
+ predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
@@ -1072,7 +1072,7 @@ STAGE PLANS:
GatherStats: false
Filter Operator
isSamplingPred: false
- predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean)
+ predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (struct(ds)) IN (const struct('2000-04-08'), const struct('2000-04-09')) and (ds = '2008-04-08')) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)