You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/05/13 17:11:35 UTC
[2/2] hive git commit: HIVE-13602: TPCH q16 return wrong result when
CBO is on (Pengcheng Xiong, reviewed by Aihua Xu, Ashutosh Chauhan)
HIVE-13602: TPCH q16 return wrong result when CBO is on (Pengcheng Xiong, reviewed by Aihua Xu, Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7d766d0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7d766d0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7d766d0b
Branch: refs/heads/master
Commit: 7d766d0bd808eb51cad043a2050787525a6655ce
Parents: 503b9e9
Author: Pengcheng Xiong <px...@apache.org>
Authored: Fri May 13 10:10:13 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Fri May 13 10:10:21 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/metadata/VirtualColumn.java | 12 +
.../ql/optimizer/ConstantPropagateProcCtx.java | 215 ++++----
.../optimizer/ConstantPropagateProcFactory.java | 40 +-
.../optimizer/SortedDynPartitionOptimizer.java | 20 +-
.../queries/clientpositive/constant_prop_1.q | 51 ++
.../queries/clientpositive/constant_prop_2.q | 9 +
.../queries/clientpositive/constant_prop_3.q | 54 ++
ql/src/test/queries/clientpositive/cte_7.q | 14 +
.../cbo_rp_annotate_stats_groupby.q.out | 70 ++-
.../clientpositive/columnstats_partlvl.q.out | 70 +--
.../clientpositive/columnstats_partlvl_dp.q.out | 28 +-
.../clientpositive/constant_prop_1.q.out | 547 +++++++++++++++++++
.../clientpositive/constant_prop_2.q.out | 75 +++
.../clientpositive/constant_prop_3.q.out | 384 +++++++++++++
ql/src/test/results/clientpositive/cte_7.q.out | 55 ++
.../dynpart_sort_optimization.q.out | 12 +-
.../dynpart_sort_optimization_acid.q.out | 6 +-
.../clientpositive/groupby_duplicate_key.q.out | 16 +-
.../results/clientpositive/quotedid_basic.q.out | 4 +-
.../subquery_notin_having.q.java1.7.out | 12 +-
.../tez/dynpart_sort_optimization.q.out | 12 +-
21 files changed, 1499 insertions(+), 207 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
index ecc5d92..abcded4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
@@ -26,9 +26,11 @@ import java.util.ListIterator;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.io.RecordIdentifier;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -175,4 +177,14 @@ public class VirtualColumn implements Serializable {
}
return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
}
+
+ public static boolean isVirtualColumnBasedOnAlias(ColumnInfo column) {
+ // Not using method column.getIsVirtualCol() because partitioning columns
+ // are also treated as virtual columns in ColumnInfo.
+ if (column.getAlias() != null
+ && VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getAlias().toUpperCase())) {
+ return true;
+ }
+ return false;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
index 1814550..bc52f7b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java
@@ -20,9 +20,11 @@ package org.apache.hadoop.hive.ql.optimizer;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -30,10 +32,16 @@ import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
/**
@@ -73,37 +81,6 @@ public class ConstantPropagateProcCtx implements NodeProcessorCtx {
}
/**
- * Resolve a ColumnInfo based on given RowResolver.
- *
- * @param ci
- * @param rr
- * @param parentRR
- * @return
- * @throws SemanticException
- */
- private ColumnInfo resolve(ColumnInfo ci, RowSchema rs, RowSchema parentRS) {
- // Resolve new ColumnInfo from <tableAlias, alias>
- String alias = ci.getAlias();
- if (alias == null) {
- alias = ci.getInternalName();
- }
- String tblAlias = ci.getTabAlias();
- ColumnInfo rci = rs.getColumnInfo(tblAlias, alias);
- if (rci == null && rs.getTableNames().size() == 1 &&
- parentRS.getTableNames().size() == 1) {
- rci = rs.getColumnInfo(rs.getTableNames().iterator().next(),
- alias);
- }
- if (rci == null) {
- return null;
- }
- LOG.debug("Resolved "
- + ci.getTabAlias() + "." + ci.getAlias() + " as "
- + rci.getTabAlias() + "." + rci.getAlias() + " with rs: " + rs);
- return rci;
- }
-
- /**
* Get propagated constant map from parents.
*
* Traverse all parents of current operator, if there is propagated constant (determined by
@@ -115,8 +92,8 @@ public class ConstantPropagateProcCtx implements NodeProcessorCtx {
* @return map of ColumnInfo to ExprNodeDesc. The values of that map must be either
* ExprNodeConstantDesc or ExprNodeNullDesc.
*/
- public Map<ColumnInfo, ExprNodeDesc> getPropagatedConstants(
- Operator<? extends Serializable> op) {
+ public Map<ColumnInfo, ExprNodeDesc> getPropagatedConstants(Operator<? extends Serializable> op) {
+ // this map should map columnInfo to ExprConstantNodeDesc
Map<ColumnInfo, ExprNodeDesc> constants = new HashMap<ColumnInfo, ExprNodeDesc>();
if (op.getSchema() == null) {
return constants;
@@ -128,82 +105,134 @@ public class ConstantPropagateProcCtx implements NodeProcessorCtx {
return constants;
}
- if (op instanceof UnionOperator) {
- String alias = rs.getSignature().get(0).getTabAlias();
- // find intersection
- Map<ColumnInfo, ExprNodeDesc> intersection = null;
- for (Operator<?> parent : op.getParentOperators()) {
- Map<ColumnInfo, ExprNodeDesc> unionConst = opToConstantExprs.get(parent);
- LOG.debug("Constant of op " + parent.getOperatorId() + " " + unionConst);
- if (intersection == null) {
- intersection = new HashMap<ColumnInfo, ExprNodeDesc>();
- for (Entry<ColumnInfo, ExprNodeDesc> e : unionConst.entrySet()) {
- ColumnInfo ci = new ColumnInfo(e.getKey());
- ci.setTabAlias(alias);
- intersection.put(ci, e.getValue());
+ // A previous solution is based on tableAlias and colAlias, which is
+ // unsafe, esp. when CBO generates derived table names. see HIVE-13602.
+ // For correctness purpose, we only trust colExpMap.
+ // We assume that CBO can do the constantPropagation before this function is
+ // called to help improve the performance.
+ // UnionOperator, LimitOperator and FilterOperator are special, they should already be
+ // column-position aligned.
+
+ List<Map<Integer, ExprNodeDesc>> parentsToConstant = new ArrayList<>();
+ boolean areAllParentsContainConstant = true;
+ boolean noParentsContainConstant = true;
+ for (Operator<?> parent : op.getParentOperators()) {
+ Map<ColumnInfo, ExprNodeDesc> constMap = opToConstantExprs.get(parent);
+ if (constMap == null) {
+ LOG.debug("Constant of Op " + parent.getOperatorId() + " is not found");
+ areAllParentsContainConstant = false;
+ } else {
+ noParentsContainConstant = false;
+ Map<Integer, ExprNodeDesc> map = new HashMap<>();
+ for (Entry<ColumnInfo, ExprNodeDesc> entry : constMap.entrySet()) {
+ map.put(parent.getSchema().getPosition(entry.getKey().getInternalName()),
+ entry.getValue());
+ }
+ parentsToConstant.add(map);
+ LOG.debug("Constant of Op " + parent.getOperatorId() + " " + constMap);
+ }
+ }
+ if (noParentsContainConstant) {
+ return constants;
+ }
+
+ ArrayList<ColumnInfo> signature = op.getSchema().getSignature();
+ if (op instanceof LimitOperator || op instanceof FilterOperator) {
+ // there should be only one parent.
+ if (op.getParentOperators().size() == 1) {
+ Map<Integer, ExprNodeDesc> parentToConstant = parentsToConstant.get(0);
+ for (int index = 0; index < signature.size(); index++) {
+ if (parentToConstant.containsKey(index)) {
+ constants.put(signature.get(index), parentToConstant.get(index));
}
- } else {
- Iterator<Entry<ColumnInfo, ExprNodeDesc>> itr = intersection.entrySet().iterator();
- while (itr.hasNext()) {
- Entry<ColumnInfo, ExprNodeDesc> e = itr.next();
- boolean found = false;
- for (Entry<ColumnInfo, ExprNodeDesc> f : opToConstantExprs.get(parent).entrySet()) {
- if (e.getKey().getInternalName().equals(f.getKey().getInternalName())) {
- if (e.getValue().isSame(f.getValue())) {
- found = true;
- }
+ }
+ }
+ } else if (op instanceof UnionOperator && areAllParentsContainConstant) {
+ for (int index = 0; index < signature.size(); index++) {
+ ExprNodeDesc constant = null;
+ for (Map<Integer, ExprNodeDesc> parentToConstant : parentsToConstant) {
+ if (!parentToConstant.containsKey(index)) {
+ // if this parent does not contain a constant at this position, we
+ // continue to look at other positions.
+ constant = null;
+ break;
+ } else {
+ if (constant == null) {
+ constant = parentToConstant.get(index);
+ } else {
+ // compare if they are the same constant.
+ ExprNodeDesc nextConstant = parentToConstant.get(index);
+ if (!nextConstant.isSame(constant)) {
+ // they are not the same constant. for example, union all of 1
+ // and 2.
+ constant = null;
break;
}
}
- if (!found) {
- itr.remove();
- }
}
}
- if (intersection.isEmpty()) {
- return intersection;
+ // we have checked all the parents for the "index" position.
+ if (constant != null) {
+ constants.put(signature.get(index), constant);
}
}
- LOG.debug("Propagated union constants:" + intersection);
- return intersection;
- }
-
- for (Operator<? extends Serializable> parent : op.getParentOperators()) {
- Map<ColumnInfo, ExprNodeDesc> c = opToConstantExprs.get(parent);
- for (Entry<ColumnInfo, ExprNodeDesc> e : c.entrySet()) {
- ColumnInfo ci = e.getKey();
- ExprNodeDesc constant = e.getValue();
- boolean resolved = false;
- ColumnInfo rci = resolve(ci, rs, parent.getSchema());
-
- if (rci != null) {
- constants.put(rci, constant);
- resolved = true;
+ } else if (op instanceof JoinOperator) {
+ JoinOperator joinOp = (JoinOperator) op;
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> itr = joinOp.getConf().getExprs().entrySet()
+ .iterator();
+ while (itr.hasNext()) {
+ Entry<Byte, List<ExprNodeDesc>> e = itr.next();
+ int tag = e.getKey();
+ Operator<?> parent = op.getParentOperators().get(tag);
+ List<ExprNodeDesc> exprs = e.getValue();
+ if (exprs == null) {
+ continue;
}
- if (!resolved &&
- op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
- for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
- if (entry.getValue().isSame(constant)) {
- ColumnInfo rsColumnInfo = rs.getColumnInfo(entry.getKey());
- if (rsColumnInfo == null) {
- continue;
+ for (ExprNodeDesc expr : exprs) {
+ // we are only interested in ExprNodeColumnDesc
+ if (expr instanceof ExprNodeColumnDesc) {
+ String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
+ // find this parentColName in its parent's rs
+ int parentPos = parent.getSchema().getPosition(parentColName);
+ if (parentsToConstant.get(tag).containsKey(parentPos)) {
+ // this position in parent is a constant
+ // reverse look up colExprMap to find the childColName
+ if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
+ for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
+ if (entry.getValue().isSame(expr)) {
+ // now propagate the constant from the parent to the child
+ constants.put(signature.get(op.getSchema().getPosition(entry.getKey())),
+ parentsToConstant.get(tag).get(parentPos));
+ }
+ }
}
- constants.put(rsColumnInfo, constant);
- resolved = true;
}
}
}
-
- if (!resolved) {
- LOG.debug("Can't resolve " + ci.getTabAlias() + "." + ci.getAlias() +
- "(" + ci.getInternalName() + ") from rs:" + rs);
+ }
+ } else {
+ // there should be only one parent.
+ if (op.getParentOperators().size() == 1) {
+ Operator<?> parent = op.getParentOperators().get(0);
+ if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
+ for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
+ ExprNodeDesc expr = entry.getValue();
+ if (expr instanceof ExprNodeColumnDesc) {
+ String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
+ // find this parentColName in its parent's rs
+ int parentPos = parent.getSchema().getPosition(parentColName);
+ if (parentsToConstant.get(0).containsKey(parentPos)) {
+ // this position in parent is a constant
+ // now propagate the constant from the parent to the child
+ constants.put(signature.get(op.getSchema().getPosition(entry.getKey())),
+ parentsToConstant.get(0).get(parentPos));
+ }
+ }
+ }
}
}
}
-
- LOG.debug("Offerring constants " + constants.keySet()
- + " to operator " + op.toString());
-
+ LOG.debug("Offerring constants " + constants.keySet() + " to operator " + op.toString());
return constants;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index 8c1f34d..6952ffb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
@@ -1096,6 +1097,18 @@ public final class ConstantPropagateProcFactory {
Map<ColumnInfo, ExprNodeDesc> colToConstants = cppCtx.getPropagatedConstants(op);
cppCtx.getOpToConstantExprs().put(op, colToConstants);
+ RowSchema rs = op.getSchema();
+ if (op.getColumnExprMap() != null && rs != null) {
+ for (ColumnInfo colInfo : rs.getSignature()) {
+ if (!VirtualColumn.isVirtualColumnBasedOnAlias(colInfo)) {
+ ExprNodeDesc expr = op.getColumnExprMap().get(colInfo.getInternalName());
+ if (expr instanceof ExprNodeConstantDesc) {
+ colToConstants.put(colInfo, expr);
+ }
+ }
+ }
+ }
+
if (colToConstants.isEmpty()) {
return null;
}
@@ -1133,6 +1146,17 @@ public final class ConstantPropagateProcFactory {
Operator<? extends Serializable> op = (Operator<? extends Serializable>) nd;
Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op);
cppCtx.getOpToConstantExprs().put(op, constants);
+ RowSchema rs = op.getSchema();
+ if (op.getColumnExprMap() != null && rs != null) {
+ for (ColumnInfo colInfo : rs.getSignature()) {
+ if (!VirtualColumn.isVirtualColumnBasedOnAlias(colInfo)) {
+ ExprNodeDesc expr = op.getColumnExprMap().get(colInfo.getInternalName());
+ if (expr instanceof ExprNodeConstantDesc) {
+ constants.put(colInfo, expr);
+ }
+ }
+ }
+ }
if (constants.isEmpty()) {
return null;
}
@@ -1185,7 +1209,10 @@ public final class ConstantPropagateProcFactory {
}
colList.set(i, newCol);
if (newCol instanceof ExprNodeConstantDesc && op.getSchema() != null) {
- constants.put(op.getSchema().getSignature().get(i), newCol);
+ ColumnInfo colInfo = op.getSchema().getSignature().get(i);
+ if (!VirtualColumn.isVirtualColumnBasedOnAlias(colInfo)) {
+ constants.put(colInfo, newCol);
+ }
}
if (columnExprMap != null) {
columnExprMap.put(columnNames.get(i), newCol);
@@ -1296,6 +1323,17 @@ public final class ConstantPropagateProcFactory {
Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op);
cppCtx.getOpToConstantExprs().put(op, constants);
+ RowSchema rs = op.getSchema();
+ if (op.getColumnExprMap() != null && rs != null) {
+ for (ColumnInfo colInfo : rs.getSignature()) {
+ if (!VirtualColumn.isVirtualColumnBasedOnAlias(colInfo)) {
+ ExprNodeDesc expr = op.getColumnExprMap().get(colInfo.getInternalName());
+ if (expr instanceof ExprNodeConstantDesc) {
+ constants.put(colInfo, expr);
+ }
+ }
+ }
+ }
if (constants.isEmpty()) {
return null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index adfbb67..010c89e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -413,7 +414,7 @@ public class SortedDynPartitionOptimizer extends Transform {
public ReduceSinkOperator getReduceSinkOp(List<Integer> partitionPositions,
List<Integer> sortPositions, List<Integer> sortOrder, List<Integer> sortNullOrder,
ArrayList<ExprNodeDesc> allCols, ArrayList<ExprNodeDesc> bucketColumns, int numBuckets,
- Operator<? extends OperatorDesc> parent, AcidUtils.Operation writeType) {
+ Operator<? extends OperatorDesc> parent, AcidUtils.Operation writeType) throws SemanticException {
// Order of KEY columns
// 1) Partition columns
@@ -518,17 +519,21 @@ public class SortedDynPartitionOptimizer extends Transform {
}
}
+ // map _col0 to KEY._col0, etc
+ Map<String, String> nameMapping = new HashMap<>();
ArrayList<String> keyColNames = Lists.newArrayList();
for (ExprNodeDesc keyCol : keyCols) {
String keyColName = keyCol.getExprString();
keyColNames.add(keyColName);
colExprMap.put(Utilities.ReduceField.KEY + "." +keyColName, keyCol);
+ nameMapping.put(keyColName, Utilities.ReduceField.KEY + "." + keyColName);
}
ArrayList<String> valColNames = Lists.newArrayList();
for (ExprNodeDesc valCol : valCols) {
- String colName =valCol.getExprString();
+ String colName = valCol.getExprString();
valColNames.add(colName);
- colExprMap.put(Utilities.ReduceField.VALUE + "." +colName, valCol);
+ colExprMap.put(Utilities.ReduceField.VALUE + "." + colName, valCol);
+ nameMapping.put(colName, Utilities.ReduceField.VALUE + "." + colName);
}
// Create Key/Value TableDesc. When the operator plan is split into MR tasks,
@@ -548,8 +553,15 @@ public class SortedDynPartitionOptimizer extends Transform {
valueTable, writeType);
rsConf.setBucketCols(bucketColumns);
rsConf.setNumBuckets(numBuckets);
+
+ ArrayList<ColumnInfo> signature = new ArrayList<>();
+ for (int index = 0; index < parent.getSchema().getSignature().size(); index++) {
+ ColumnInfo colInfo = new ColumnInfo(parent.getSchema().getSignature().get(index));
+ colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
+ signature.add(colInfo);
+ }
ReduceSinkOperator op = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
- rsConf, new RowSchema(parent.getSchema()), parent);
+ rsConf, new RowSchema(signature), parent);
op.setColumnExprMap(colExprMap);
return op;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/queries/clientpositive/constant_prop_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constant_prop_1.q b/ql/src/test/queries/clientpositive/constant_prop_1.q
new file mode 100644
index 0000000..9a0a17c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constant_prop_1.q
@@ -0,0 +1,51 @@
+set hive.cbo.enable=false;
+
+
+explain
+select 1 as a from src
+union all
+select 1 as a from src limit 1;
+
+explain
+select a, key, value from
+(
+select 1 as a from src
+union all
+select 1 as a from src limit 1
+)sub join src b where value='12345';
+
+
+explain
+select 1 as a from src
+union all
+select 2 as a from src limit 1;
+
+explain
+select a, key, value from
+(
+select 1 as a from src
+union all
+select 2 as a from src limit 1
+)sub join src b where value='12345';
+
+explain
+select a.key, b.value from src a join src b where a.key = '238' and b.value = '234';
+
+explain
+select a.key, b.value from src a join src b on a.key=b.key where b.value = '234';
+
+create table t (
+a int,
+b int,
+c int,
+d int,
+e int
+);
+
+explain
+select a2 as a3 from
+(select a1 as a2, c1 as c2 from
+(select a as a1, b as b1, c as c1 from t where a=1 and b=2 and c=3)sub1)sub2;
+
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/queries/clientpositive/constant_prop_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constant_prop_2.q b/ql/src/test/queries/clientpositive/constant_prop_2.q
new file mode 100644
index 0000000..4bc14a6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constant_prop_2.q
@@ -0,0 +1,9 @@
+set hive.mapred.mode=nonstrict;
+set hive.compute.query.using.stats=true;
+set hive.stats.autogather=true;
+
+explain select count('1') from src group by '1';
+select count('1') from src group by '1';
+
+explain
+analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value;
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/queries/clientpositive/constant_prop_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constant_prop_3.q b/ql/src/test/queries/clientpositive/constant_prop_3.q
new file mode 100644
index 0000000..e1090a6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constant_prop_3.q
@@ -0,0 +1,54 @@
+set hive.mapred.mode=nonstrict;
+
+drop table part_hive;
+drop table partsupp_hive;
+drop table supplier_hive;
+
+create table part_hive (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING,
+P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING);
+
+create table partsupp_hive (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE,
+PS_COMMENT STRING);
+
+create table supplier_hive (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT,
+S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING);
+
+analyze table part_hive compute statistics;
+analyze table part_hive compute statistics for columns;
+
+analyze table partsupp_hive compute statistics;
+analyze table partsupp_hive compute statistics for columns;
+
+analyze table supplier_hive compute statistics;
+analyze table supplier_hive compute statistics for columns;
+
+explain select
+ p_brand,
+ p_type,
+ p_size,
+ count(distinct ps_suppkey) as supplier_cnt
+from
+ partsupp_hive,
+ part_hive
+where
+ p_partkey = ps_partkey
+ and p_brand <> 'Brand#34'
+ and p_type not like 'ECONOMY BRUSHED%'
+ and p_size in (22, 14, 27, 49, 21, 33, 35, 28)
+ and partsupp_hive.ps_suppkey not in (
+ select
+ s_suppkey
+ from
+ supplier_hive
+ where
+ s_comment like '%Customer%Complaints%'
+ )
+group by
+ p_brand,
+ p_type,
+ p_size
+order by
+ supplier_cnt desc,
+ p_brand,
+ p_type,
+ p_size;
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/queries/clientpositive/cte_7.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cte_7.q b/ql/src/test/queries/clientpositive/cte_7.q
new file mode 100644
index 0000000..02a6813
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_7.q
@@ -0,0 +1,14 @@
+set hive.cbo.enable=false;
+
+create table t (i int,a string,b string);
+
+insert into t values (1,'hello','world'),(2,'bye',null);
+
+select * from t where t.b is null;
+
+with cte as (select t.a as a,t.a as b,t.a as c from t where t.b is null) select * from cte;
+
+select t.a as a,t.a as b,t.a as c from t where t.b is null;
+
+with cte as (select t.a as a,t.a as b,t.a as c from t where t.b is not null) select * from cte;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index e19bb9e..a1be9b9d 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -362,10 +362,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
@@ -419,10 +420,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
@@ -476,10 +478,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
@@ -533,10 +536,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
@@ -590,10 +594,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
@@ -647,10 +652,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
@@ -761,10 +767,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
@@ -873,10 +880,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
@@ -930,10 +938,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
@@ -987,10 +996,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
@@ -1044,10 +1054,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
@@ -1101,10 +1112,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
@@ -1158,10 +1170,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
@@ -1268,10 +1281,11 @@ STAGE PLANS:
Group By Operator
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ key expressions: _col0 (type: string), _col1 (type: int), '0' (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index 8587ed3..f6f2bfa 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -47,26 +47,26 @@ STAGE PLANS:
TableScan
alias: employee_part
Select Operator
- expressions: 2000.0 (type: double), employeeid (type: int)
- outputColumnNames: employeesalary, employeeid
+ expressions: employeeid (type: int)
+ outputColumnNames: employeeid
Group By Operator
aggregations: compute_stats(employeeid, 16)
- keys: employeesalary (type: double)
+ keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Reduce Output Operator
- key expressions: _col0 (type: double)
+ key expressions: 2000.0 (type: double)
sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Map-reduce partition columns: 2000.0 (type: double)
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
- keys: KEY._col0 (type: double)
+ keys: 2000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -100,18 +100,18 @@ STAGE PLANS:
alias: employee_part
GatherStats: false
Select Operator
- expressions: 2000.0 (type: double), employeeid (type: int)
- outputColumnNames: employeesalary, employeeid
+ expressions: employeeid (type: int)
+ outputColumnNames: employeeid
Group By Operator
aggregations: compute_stats(employeeid, 16)
- keys: employeesalary (type: double)
+ keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Reduce Output Operator
- key expressions: _col0 (type: double)
+ key expressions: 2000.0 (type: double)
null sort order: a
sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Map-reduce partition columns: 2000.0 (type: double)
tag: -1
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
auto parallelism: false
@@ -170,11 +170,11 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
- keys: KEY._col0 (type: double)
+ keys: 2000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -233,26 +233,26 @@ STAGE PLANS:
TableScan
alias: employee_part
Select Operator
- expressions: 4000.0 (type: double), employeeid (type: int)
- outputColumnNames: employeesalary, employeeid
+ expressions: employeeid (type: int)
+ outputColumnNames: employeeid
Group By Operator
aggregations: compute_stats(employeeid, 16)
- keys: employeesalary (type: double)
+ keys: 4000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Reduce Output Operator
- key expressions: _col0 (type: double)
+ key expressions: 4000.0 (type: double)
sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Map-reduce partition columns: 4000.0 (type: double)
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
- keys: KEY._col0 (type: double)
+ keys: 4000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -286,18 +286,18 @@ STAGE PLANS:
alias: employee_part
GatherStats: false
Select Operator
- expressions: 4000.0 (type: double), employeeid (type: int)
- outputColumnNames: employeesalary, employeeid
+ expressions: employeeid (type: int)
+ outputColumnNames: employeeid
Group By Operator
aggregations: compute_stats(employeeid, 16)
- keys: employeesalary (type: double)
+ keys: 4000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
Reduce Output Operator
- key expressions: _col0 (type: double)
+ key expressions: 4000.0 (type: double)
null sort order: a
sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Map-reduce partition columns: 4000.0 (type: double)
tag: -1
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
auto parallelism: false
@@ -356,11 +356,11 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
- keys: KEY._col0 (type: double)
+ keys: 4000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
@@ -419,26 +419,26 @@ STAGE PLANS:
TableScan
alias: employee_part
Select Operator
- expressions: 2000.0 (type: double), employeeid (type: int), employeename (type: string)
- outputColumnNames: employeesalary, employeeid, employeename
+ expressions: employeeid (type: int), employeename (type: string)
+ outputColumnNames: employeeid, employeename
Group By Operator
aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
- keys: employeesalary (type: double)
+ keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
Reduce Output Operator
- key expressions: _col0 (type: double)
+ key expressions: 2000.0 (type: double)
sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ Map-reduce partition columns: 2000.0 (type: double)
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
- keys: KEY._col0 (type: double)
+ keys: 2000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index bb0ea86..21089e1 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -85,26 +85,26 @@ STAGE PLANS:
TableScan
alias: employee_part
Select Operator
- expressions: 4000.0 (type: double), country (type: string), employeename (type: string), employeeid (type: int)
- outputColumnNames: employeesalary, country, employeename, employeeid
+ expressions: country (type: string), employeename (type: string), employeeid (type: int)
+ outputColumnNames: country, employeename, employeeid
Group By Operator
aggregations: compute_stats(employeename, 16), compute_stats(employeeid, 16)
- keys: employeesalary (type: double), country (type: string)
+ keys: 4000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: string)
+ key expressions: 4000.0 (type: double), _col1 (type: string)
sort order: ++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
+ Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string)
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
- keys: KEY._col0 (type: double), KEY._col1 (type: string)
+ keys: 4000.0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
Select Operator
- expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
File Output Operator
compressed: false
@@ -158,26 +158,26 @@ STAGE PLANS:
TableScan
alias: employee_part
Select Operator
- expressions: 2000.0 (type: double), country (type: string), employeeid (type: int)
- outputColumnNames: employeesalary, country, employeeid
+ expressions: country (type: string), employeeid (type: int)
+ outputColumnNames: country, employeeid
Group By Operator
aggregations: compute_stats(employeeid, 16)
- keys: employeesalary (type: double), country (type: string)
+ keys: 2000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: string)
+ key expressions: 2000.0 (type: double), _col1 (type: string)
sort order: ++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
+ Map-reduce partition columns: 2000.0 (type: double), _col1 (type: string)
value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
- keys: KEY._col0 (type: double), KEY._col1 (type: string)
+ keys: 2000.0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/results/clientpositive/constant_prop_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constant_prop_1.q.out b/ql/src/test/results/clientpositive/constant_prop_1.q.out
new file mode 100644
index 0000000..2ba2430
--- /dev/null
+++ b/ql/src/test/results/clientpositive/constant_prop_1.q.out
@@ -0,0 +1,547 @@
+PREHOOK: query: explain
+select 1 as a from src
+union all
+select 1 as a from src limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 1 as a from src
+union all
+select 1 as a from src limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[13][tables = [sub, b]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain
+select a, key, value from
+(
+select 1 as a from src
+union all
+select 1 as a from src limit 1
+)sub join src b where value='12345'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a, key, value from
+(
+select 1 as a from src
+union all
+select 1 as a from src limit 1
+)sub join src b where value='12345'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reduce Operator Tree:
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value = '12345') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int), _col1 (type: string), '12345' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select 1 as a from src
+union all
+select 2 as a from src limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 1 as a from src
+union all
+select 2 as a from src limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[13][tables = [sub, b]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain
+select a, key, value from
+(
+select 1 as a from src
+union all
+select 2 as a from src limit 1
+)sub join src b where value='12345'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a, key, value from
+(
+select 1 as a from src
+union all
+select 2 as a from src limit 1
+)sub join src b where value='12345'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: int)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value = '12345') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '12345' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [a, b]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select a.key, b.value from src a join src b where a.key = '238' and b.value = '234'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.value from src a join src b where a.key = '238' and b.value = '234'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = '238') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value = '234') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '238' (type: string), '234' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.key, b.value from src a join src b on a.key=b.key where b.value = '234'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.value from src a join src b on a.key=b.key where b.value = '234'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (value = '234')) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), '234' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: create table t (
+a int,
+b int,
+c int,
+d int,
+e int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (
+a int,
+b int,
+c int,
+d int,
+e int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: explain
+select a2 as a3 from
+(select a1 as a2, c1 as c2 from
+(select a as a1, b as b1, c as c1 from t where a=1 and b=2 and c=3)sub1)sub2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a2 as a3 from
+(select a1 as a2, c1 as c2 from
+(select a as a1, b as b1, c as c1 from t where a=1 and b=2 and c=3)sub1)sub2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((a = 1) and (b = 2) and (c = 3)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/7d766d0b/ql/src/test/results/clientpositive/constant_prop_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constant_prop_2.q.out b/ql/src/test/results/clientpositive/constant_prop_2.q.out
new file mode 100644
index 0000000..c1de559
--- /dev/null
+++ b/ql/src/test/results/clientpositive/constant_prop_2.q.out
@@ -0,0 +1,75 @@
+PREHOOK: query: explain select count('1') from src group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from src group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from src group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from src group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+500
+PREHOOK: query: explain
+analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+analyze table srcpart partition (ds='2008-04-08',hr=11) compute statistics for columns key, value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+ Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-0
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: '2008-04-08' (type: string), '11' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string), '11' (type: string)
+ sort order: ++
+ Map-reduce partition columns: '2008-04-08' (type: string), '11' (type: string)
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: '2008-04-08' (type: string), '11' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.srcpart
+