You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2015/05/23 20:49:20 UTC
[47/48] hive git commit: HIVE-8769 : Physical optimizer : Incorrect
CE results in a shuffle join instead of a Map join (PK/FK pattern not
detected) (Pengcheng Xiong via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 44269f0..571c050 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -337,10 +337,9 @@ public class StatsRulesProcFactory {
// can be boolean column in which case return true count
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred;
String colName = encd.getColumn();
- String tabAlias = encd.getTabAlias();
String colType = encd.getTypeString();
if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
- ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
+ ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
if (cs != null) {
return cs.getNumTrues();
}
@@ -393,10 +392,9 @@ public class StatsRulesProcFactory {
// NOT on boolean columns is possible. in which case return false count.
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf;
String colName = encd.getColumn();
- String tabAlias = encd.getTabAlias();
String colType = encd.getTypeString();
if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
- ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
+ ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
if (cs != null) {
return cs.getNumFalses();
}
@@ -423,8 +421,7 @@ public class StatsRulesProcFactory {
if (leaf instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf;
String colName = colDesc.getColumn();
- String tabAlias = colDesc.getTabAlias();
- ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
+ ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
if (cs != null) {
return cs.getNumNulls();
}
@@ -450,7 +447,6 @@ public class StatsRulesProcFactory {
if (udf instanceof GenericUDFOPEqual ||
udf instanceof GenericUDFOPEqualNS) {
String colName = null;
- String tabAlias = null;
boolean isConst = false;
Object prevConst = null;
@@ -483,7 +479,7 @@ public class StatsRulesProcFactory {
return numRows;
}
- ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
+ ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
if (cs != null) {
long dvs = cs.getCountDistint();
numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
@@ -492,7 +488,6 @@ public class StatsRulesProcFactory {
} else if (leaf instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf;
colName = colDesc.getColumn();
- tabAlias = colDesc.getTabAlias();
// if const is first argument then evaluate the result
if (isConst) {
@@ -504,7 +499,7 @@ public class StatsRulesProcFactory {
return numRows;
}
- ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
+ ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
if (cs != null) {
long dvs = cs.getCountDistint();
numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
@@ -753,11 +748,11 @@ public class StatsRulesProcFactory {
GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class);
if (mGop != null) {
containsGroupingSet = mGop.getConf().isGroupingSetsPresent();
- sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size();
}
if (containsGroupingSet) {
// Case 8: column stats, grouping sets
+ sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size();
cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet));
if (isDebugEnabled) {
@@ -826,9 +821,8 @@ public class StatsRulesProcFactory {
// for those newly added columns
if (!colExprMap.containsKey(ci.getInternalName())) {
String colName = ci.getInternalName();
- String tabAlias = ci.getTabAlias();
String colType = ci.getTypeName();
- ColStatistics cs = new ColStatistics(tabAlias, colName, colType);
+ ColStatistics cs = new ColStatistics(colName, colType);
cs.setCountDistint(stats.getNumRows());
cs.setNumNulls(0);
cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType));
@@ -1053,54 +1047,37 @@ public class StatsRulesProcFactory {
// statistics object that is combination of statistics from all
// relations involved in JOIN
Statistics stats = new Statistics();
- Map<String, Long> rowCountParents = new HashMap<String, Long>();
List<Long> distinctVals = Lists.newArrayList();
int numParent = parents.size();
- Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
+ Map<Integer, Long> rowCountParents = Maps.newHashMap();
+ Map<Integer, Statistics> joinStats = Maps.newHashMap();
Map<Integer, List<String>> joinKeys = Maps.newHashMap();
List<Long> rowCounts = Lists.newArrayList();
// detect if there are multiple attributes in join key
ReduceSinkOperator rsOp = (ReduceSinkOperator) jop.getParentOperators().get(0);
- List<String> keyExprs = rsOp.getConf().getOutputKeyColumnNames();
+ List<String> keyExprs = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf()
+ .getOutputKeyColumnNames());
numAttr = keyExprs.size();
// infer PK-FK relationship in single attribute join case
pkfkInferred = false;
inferPKFKRelationship();
-
// get the join keys from parent ReduceSink operators
for (int pos = 0; pos < parents.size(); pos++) {
ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
-
Statistics parentStats = parent.getStatistics();
- keyExprs = parent.getConf().getOutputKeyColumnNames();
-
- // Parent RS may have column statistics from multiple parents.
- // Populate table alias to row count map, this will be used later to
- // scale down/up column statistics based on new row count
- // NOTE: JOIN with UNION as parent of RS will not have table alias
- // propagated properly. UNION operator does not propagate the table
- // alias of subqueries properly to expression nodes. Hence union20.q
- // will have wrong number of rows.
- Set<String> tableAliases = StatsUtils.getAllTableAlias(parent.getColumnExprMap());
- for (String tabAlias : tableAliases) {
- rowCountParents.put(tabAlias, parentStats.getNumRows());
- }
+ keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf()
+ .getOutputKeyColumnNames());
+
+ rowCountParents.put(pos, parentStats.getNumRows());
rowCounts.add(parentStats.getNumRows());
- // compute fully qualified join key column names. this name will be
- // used to quickly look-up for column statistics of join key.
- // TODO: expressions in join condition will be ignored. assign
// internal name for expressions and estimate column statistics for expression.
- List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keyExprs,
- parent.getColumnExprMap());
- joinKeys.put(pos, fqCols);
+ joinKeys.put(pos, keyExprs);
// get column statistics for all output columns
- for (ColStatistics cs : parentStats.getColumnStats()) {
- joinedColStats.put(cs.getFullyQualifiedColName(), cs);
- }
+ joinStats.put(pos, parentStats);
// since new statistics is derived from all relations involved in
// JOIN, we need to update the state information accordingly
@@ -1116,12 +1093,11 @@ public class StatsRulesProcFactory {
for (int idx = 0; idx < numAttr; idx++) {
for (Integer i : joinKeys.keySet()) {
String col = joinKeys.get(i).get(idx);
- ColStatistics cs = joinedColStats.get(col);
+ ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col);
if (cs != null) {
perAttrDVs.add(cs.getCountDistint());
}
}
-
distinctVals.add(getDenominator(perAttrDVs));
perAttrDVs.clear();
}
@@ -1136,9 +1112,10 @@ public class StatsRulesProcFactory {
}
}
} else {
- for (List<String> jkeys : joinKeys.values()) {
- for (String jk : jkeys) {
- ColStatistics cs = joinedColStats.get(jk);
+ if (numAttr == 1) {
+ for (Integer i : joinKeys.keySet()) {
+ String col = joinKeys.get(i).get(0);
+ ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col);
if (cs != null) {
distinctVals.add(cs.getCountDistint());
}
@@ -1148,28 +1125,23 @@ public class StatsRulesProcFactory {
}
// Update NDV of joined columns to be min(V(R,y), V(S,y))
- updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr);
+ updateJoinColumnsNDV(joinKeys, joinStats, numAttr);
- // column statistics from different sources are put together and rename
- // fully qualified column names based on output schema of join operator
+ // column statistics from different sources are put together and
+ // rename based on output schema of join operator
Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
RowSchema rs = jop.getSchema();
List<ColStatistics> outColStats = Lists.newArrayList();
- Map<String, String> outInTabAlias = new HashMap<String, String>();
for (ColumnInfo ci : rs.getSignature()) {
String key = ci.getInternalName();
ExprNodeDesc end = colExprMap.get(key);
if (end instanceof ExprNodeColumnDesc) {
String colName = ((ExprNodeColumnDesc) end).getColumn();
- String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias();
- String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName);
- ColStatistics cs = joinedColStats.get(fqColName);
+ int pos = jop.getConf().getReversedExprs().get(key);
+ ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(colName);
String outColName = key;
- String outTabAlias = ci.getTabAlias();
- outInTabAlias.put(outTabAlias, tabAlias);
if (cs != null) {
cs.setColumnName(outColName);
- cs.setTableAlias(outTabAlias);
}
outColStats.add(cs);
}
@@ -1178,7 +1150,7 @@ public class StatsRulesProcFactory {
// update join statistics
stats.setColumnStats(outColStats);
long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom);
- updateStatsForJoinType(stats, newRowCount, jop, rowCountParents,outInTabAlias);
+ updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
jop.setStatistics(stats);
if (isDebugEnabled) {
@@ -1364,13 +1336,11 @@ public class StatsRulesProcFactory {
Operator<? extends OperatorDesc> op = ops.get(i);
if (op != null && op instanceof ReduceSinkOperator) {
ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
- List<String> keys = rsOp.getConf().getOutputKeyColumnNames();
- List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys,
- rsOp.getColumnExprMap());
- if (fqCols.size() == 1) {
- String joinCol = fqCols.get(0);
+ List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames());
+ if (keys.size() == 1) {
+ String joinCol = keys.get(0);
if (rsOp.getStatistics() != null) {
- ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol);
+ ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol);
if (cs != null && !cs.isPrimaryKey()) {
if (StatsUtils.inferForeignKey(csPK, cs)) {
result.add(i);
@@ -1395,13 +1365,11 @@ public class StatsRulesProcFactory {
Operator<? extends OperatorDesc> op = ops.get(i);
if (op instanceof ReduceSinkOperator) {
ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
- List<String> keys = rsOp.getConf().getOutputKeyColumnNames();
- List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys,
- rsOp.getColumnExprMap());
- if (fqCols.size() == 1) {
- String joinCol = fqCols.get(0);
+ List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames());
+ if (keys.size() == 1) {
+ String joinCol = keys.get(0);
if (rsOp.getStatistics() != null) {
- ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol);
+ ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol);
if (cs != null && cs.isPrimaryKey()) {
result.add(i);
}
@@ -1429,13 +1397,17 @@ public class StatsRulesProcFactory {
private void updateStatsForJoinType(Statistics stats, long newNumRows,
CommonJoinOperator<? extends JoinDesc> jop,
- Map<String, Long> rowCountParents,
- Map<String, String> outInTabAlias) {
+ Map<Integer, Long> rowCountParents) {
if (newNumRows < 0) {
LOG.info("STATS-" + jop.toString() + ": Overflow in number of rows."
+ newNumRows + " rows will be set to Long.MAX_VALUE");
}
+ if (newNumRows == 0) {
+ LOG.info("STATS-" + jop.toString() + ": Equals 0 in number of rows."
+ + newNumRows + " rows will be set to 1");
+ newNumRows = 1;
+ }
newNumRows = StatsUtils.getMaxIfOverflow(newNumRows);
stats.setNumRows(newNumRows);
@@ -1447,7 +1419,8 @@ public class StatsRulesProcFactory {
// and stats for columns from 2nd parent should be scaled down by 200x
List<ColStatistics> colStats = stats.getColumnStats();
for (ColStatistics cs : colStats) {
- long oldRowCount = rowCountParents.get(outInTabAlias.get(cs.getTableAlias()));
+ int pos = jop.getConf().getReversedExprs().get(cs.getColumnName());
+ long oldRowCount = rowCountParents.get(pos);
double ratio = (double) newNumRows / (double) oldRowCount;
long oldDV = cs.getCountDistint();
long newDV = oldDV;
@@ -1499,15 +1472,16 @@ public class StatsRulesProcFactory {
}
private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys,
- Map<String, ColStatistics> joinedColStats, int numAttr) {
+ Map<Integer, Statistics> joinStats, int numAttr) {
int joinColIdx = 0;
while (numAttr > 0) {
long minNDV = Long.MAX_VALUE;
// find min NDV for joining columns
for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+ int pos = entry.getKey();
String key = entry.getValue().get(joinColIdx);
- ColStatistics cs = joinedColStats.get(key);
+ ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key);
if (cs != null && cs.getCountDistint() < minNDV) {
minNDV = cs.getCountDistint();
}
@@ -1516,8 +1490,9 @@ public class StatsRulesProcFactory {
// set min NDV value to both columns involved in join
if (minNDV != Long.MAX_VALUE) {
for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+ int pos = entry.getKey();
String key = entry.getValue().get(joinColIdx);
- ColStatistics cs = joinedColStats.get(key);
+ ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key);
if (cs != null) {
cs.setCountDistint(minNDV);
}
@@ -1810,9 +1785,14 @@ public class StatsRulesProcFactory {
if (newNumRows < 0) {
LOG.info("STATS-" + op.toString() + ": Overflow in number of rows."
+ newNumRows + " rows will be set to Long.MAX_VALUE");
+ newNumRows = StatsUtils.getMaxIfOverflow(newNumRows);
+ }
+ if (newNumRows == 0) {
+ LOG.info("STATS-" + op.toString() + ": Equals 0 in number of rows."
+ + newNumRows + " rows will be set to 1");
+ newNumRows = 1;
}
- newNumRows = StatsUtils.getMaxIfOverflow(newNumRows);
long oldRowCount = stats.getNumRows();
double ratio = (double) newNumRows / (double) oldRowCount;
stats.setNumRows(newNumRows);
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
index 0a83440..bc34710 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level;
public class AbstractOperatorDesc implements OperatorDesc {
protected boolean vectorMode = false;
- protected transient Statistics statistics;
+ protected Statistics statistics;
protected transient OpTraits opTraits;
protected transient Map<String, String> opProps;
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
index c420190..41a1c7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
@@ -23,10 +23,8 @@ import org.apache.hadoop.hive.ql.stats.StatsUtils;
public class ColStatistics {
- private String tabAlias;
private String colName;
private String colType;
- private String fqColName;
private long countDistint;
private long numNulls;
private double avgColLen;
@@ -35,16 +33,14 @@ public class ColStatistics {
private Range range;
private boolean isPrimaryKey;
- public ColStatistics(String tabAlias, String colName, String colType) {
- this.setTableAlias(tabAlias);
+ public ColStatistics(String colName, String colType) {
this.setColumnName(colName);
this.setColumnType(colType);
- this.setFullyQualifiedColName(StatsUtils.getFullyQualifiedColumnName(tabAlias, colName));
this.setPrimaryKey(false);
}
public ColStatistics() {
- this(null, null, null);
+ this(null, null);
}
public String getColumnName() {
@@ -53,7 +49,6 @@ public class ColStatistics {
public void setColumnName(String colName) {
this.colName = colName;
- this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName);
}
public String getColumnType() {
@@ -88,23 +83,6 @@ public class ColStatistics {
this.avgColLen = avgColLen;
}
- public String getFullyQualifiedColName() {
- return fqColName;
- }
-
- public void setFullyQualifiedColName(String fqColName) {
- this.fqColName = fqColName;
- }
-
- public String getTableAlias() {
- return tabAlias;
- }
-
- public void setTableAlias(String tabName) {
- this.tabAlias = tabName;
- this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabName, colName);
- }
-
public long getNumTrues() {
return numTrues;
}
@@ -136,8 +114,6 @@ public class ColStatistics {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
- sb.append(" fqColName: ");
- sb.append(fqColName);
sb.append(" colName: ");
sb.append(colName);
sb.append(" colType: ");
@@ -163,8 +139,7 @@ public class ColStatistics {
@Override
public ColStatistics clone() throws CloneNotSupportedException {
- ColStatistics clone = new ColStatistics(tabAlias, colName, colType);
- clone.setFullyQualifiedColName(fqColName);
+ ColStatistics clone = new ColStatistics(colName, colType);
clone.setAvgColLen(avgColLen);
clone.setCountDistint(countDistint);
clone.setNumNulls(numNulls);
@@ -189,7 +164,7 @@ public class ColStatistics {
public final Number minValue;
public final Number maxValue;
- Range(Number minValue, Number maxValue) {
+ public Range(Number minValue, Number maxValue) {
super();
this.minValue = minValue;
this.maxValue = maxValue;
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index f66279f..4e52bac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -176,7 +176,7 @@ public class Statistics implements Serializable {
ColStatistics updatedCS = null;
if (cs != null) {
- String key = cs.getFullyQualifiedColName();
+ String key = cs.getColumnName();
// if column statistics for a column is already found then merge the statistics
if (columnStats.containsKey(key) && columnStats.get(key) != null) {
updatedCS = columnStats.get(key);
@@ -230,13 +230,6 @@ public class Statistics implements Serializable {
return dataSize;
}
- public ColStatistics getColumnStatisticsFromFQColName(String fqColName) {
- if (columnStats != null) {
- return columnStats.get(fqColName);
- }
- return null;
- }
-
public ColStatistics getColumnStatisticsFromColName(String colName) {
if (columnStats == null) {
return null;
@@ -249,16 +242,10 @@ public class Statistics implements Serializable {
return null;
}
- public ColStatistics getColumnStatisticsForColumn(String tabAlias, String colName) {
- String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName);
- return getColumnStatisticsFromFQColName(fqColName);
- }
-
public List<ColStatistics> getColumnStats() {
if (columnStats != null) {
return Lists.newArrayList(columnStats.values());
}
return null;
}
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 10871e4..4cd9120 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -170,6 +171,9 @@ public class StatsUtils {
nr = ds / avgRowSize;
}
}
+ if (nr == 0) {
+ nr = 1;
+ }
stats.setNumRows(nr);
stats.setDataSize(ds);
@@ -226,6 +230,9 @@ public class StatsUtils {
nr = ds / avgRowSize;
}
}
+ if (nr == 0) {
+ nr = 1;
+ }
stats.addToNumRows(nr);
stats.addToDataSize(ds);
@@ -239,8 +246,7 @@ public class StatsUtils {
for (Partition part : partList.getNotDeniedPartns()) {
partNames.add(part.getName());
}
- Map<String, String> colToTabAlias = new HashMap<String, String>();
- neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias);
+ neededColumns = processNeededColumns(schema, neededColumns);
AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(),
neededColumns, partNames);
if (null == aggrStats) {
@@ -261,8 +267,7 @@ public class StatsUtils {
LOG.debug("Column stats requested for : " + neededColumns.size() + " columns. Able to" +
" retrieve for " + colStats.size() + " columns");
}
- List<ColStatistics> columnStats = convertColStats(colStats, table.getTableName(),
- colToTabAlias);
+ List<ColStatistics> columnStats = convertColStats(colStats, table.getTableName());
addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList,
columnStats);
@@ -354,13 +359,15 @@ public class StatsUtils {
// currently metastore does not store column stats for
// partition column, so we calculate the NDV from pruned
// partition list
- ColStatistics partCS = new ColStatistics(table.getTableName(),
- ci.getInternalName(), ci.getType().getTypeName());
+ ColStatistics partCS = new ColStatistics(ci.getInternalName(), ci.getType()
+ .getTypeName());
long numPartitions = getNDVPartitionColumn(partList.getPartitions(),
ci.getInternalName());
partCS.setCountDistint(numPartitions);
partCS.setAvgColLen(StatsUtils.getAvgColLenOfVariableLengthTypes(conf,
ci.getObjectInspector(), partCS.getColumnType()));
+ partCS.setRange(getRangePartitionColumn(partList.getPartitions(), ci.getInternalName(),
+ ci.getType().getTypeName()));
colStats.add(partCS);
}
}
@@ -376,6 +383,47 @@ public class StatsUtils {
return distinctVals.size();
}
+ public static Range getRangePartitionColumn(Set<Partition> partitions, String partColName,
+ String colType) {
+ Range range = null;
+ if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
+ || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
+ || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
+ || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ long min = Long.MAX_VALUE;
+ long max = Long.MIN_VALUE;
+ for (Partition partition : partitions) {
+ long value = Long.parseLong(partition.getSpec().get(partColName));
+ min = Math.min(min, value);
+ max = Math.max(max, value);
+ }
+ range = new Range(min, max);
+ } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
+ || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ double min = Double.MAX_VALUE;
+ double max = Double.MIN_VALUE;
+ for (Partition partition : partitions) {
+ double value = Double.parseDouble(partition.getSpec().get(partColName));
+ min = Math.min(min, value);
+ max = Math.max(max, value);
+ }
+ range = new Range(min, max);
+ } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ double min = Double.MAX_VALUE;
+ double max = Double.MIN_VALUE;
+ for (Partition partition : partitions) {
+ double value = new BigDecimal(partition.getSpec().get(partColName)).doubleValue();
+ min = Math.min(min, value);
+ max = Math.max(max, value);
+ }
+ range = new Range(min, max);
+ } else {
+ // Columns statistics for complex datatypes are not supported yet
+ return null;
+ }
+ return range;
+ }
+
private static void setUnknownRcDsToAverage(
List<Long> rowCounts, List<Long> dataSizes, int avgRowSize) {
if (LOG.isDebugEnabled()) {
@@ -531,7 +579,7 @@ public class StatsUtils {
*/
public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName,
String colName) {
- ColStatistics cs = new ColStatistics(tabName, colName, cso.getColType());
+ ColStatistics cs = new ColStatistics(colName, cso.getColType());
String colType = cso.getColType();
ColumnStatisticsData csd = cso.getStatsData();
if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
@@ -612,13 +660,12 @@ public class StatsUtils {
Table table, List<ColumnInfo> schema, List<String> neededColumns) {
String dbName = table.getDbName();
String tabName = table.getTableName();
- Map<String, String> colToTabAlias = new HashMap<String, String>(schema.size());
- List<String> neededColsInTable = processNeededColumns(schema, neededColumns, colToTabAlias);
+ List<String> neededColsInTable = processNeededColumns(schema, neededColumns);
List<ColStatistics> stats = null;
try {
List<ColumnStatisticsObj> colStat = Hive.get().getTableColumnStatistics(
dbName, tabName, neededColsInTable);
- stats = convertColStats(colStat, tabName, colToTabAlias);
+ stats = convertColStats(colStat, tabName);
} catch (HiveException e) {
LOG.error("Failed to retrieve table statistics: ", e);
stats = null;
@@ -626,35 +673,29 @@ public class StatsUtils {
return stats;
}
- private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj> colStats, String tabName,
- Map<String,String> colToTabAlias) {
+ private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj> colStats, String tabName) {
List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size());
for (ColumnStatisticsObj statObj : colStats) {
ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
- cs.setTableAlias(colToTabAlias.get(cs.getColumnName()));
stats.add(cs);
}
return stats;
}
private static List<String> processNeededColumns(List<ColumnInfo> schema,
- List<String> neededColumns, Map<String, String> colToTabAlias) {
- for (ColumnInfo col : schema) {
- if (col.isHiddenVirtualCol()) continue;
- colToTabAlias.put(col.getInternalName(), col.getTabAlias());
- }
+ List<String> neededColumns) {
// Remove hidden virtual columns, as well as needed columns that are not
// part of the table. TODO: the latter case should not really happen...
List<String> neededColsInTable = null;
int limit = neededColumns.size();
for (int i = 0; i < limit; ++i) {
- if (colToTabAlias.containsKey(neededColumns.get(i))) continue;
if (neededColsInTable == null) {
neededColsInTable = Lists.newArrayList(neededColumns);
}
neededColsInTable.remove(i--);
--limit;
}
- return (neededColsInTable == null) ? neededColumns : neededColsInTable;
+ return (neededColsInTable == null || neededColsInTable.size() == 0) ? neededColumns
+ : neededColsInTable;
}
/**
@@ -1012,12 +1053,10 @@ public class StatsUtils {
if (colExprMap != null && rowSchema != null) {
for (ColumnInfo ci : rowSchema.getSignature()) {
String outColName = ci.getInternalName();
- String outTabAlias = ci.getTabAlias();
ExprNodeDesc end = colExprMap.get(outColName);
ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end);
if (colStat != null) {
colStat.setColumnName(outColName);
- colStat.setTableAlias(outTabAlias);
cs.add(colStat);
}
}
@@ -1058,10 +1097,6 @@ public class StatsUtils {
colStat = null;
}
if (colStat != null) {
- ColumnInfo ci = rowSchema.getColumnInfo(colStat.getColumnName());
- if (ci != null) {
- colStat.setTableAlias(ci.getTabAlias());
- }
cs.add(colStat);
}
}
@@ -1093,13 +1128,11 @@ public class StatsUtils {
long numNulls = 0;
ObjectInspector oi = null;
long numRows = parentStats.getNumRows();
- String tabAlias = null;
if (end instanceof ExprNodeColumnDesc) {
// column projection
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
colName = encd.getColumn();
- tabAlias = encd.getTabAlias();
if (encd.getIsPartitionColOrVirtualCol()) {
@@ -1116,7 +1149,7 @@ public class StatsUtils {
} else {
// clone the column stats and return
- ColStatistics result = parentStats.getColumnStatisticsForColumn(tabAlias, colName);
+ ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
if (result != null) {
try {
return result.clone();
@@ -1181,7 +1214,7 @@ public class StatsUtils {
avgColSize = getAvgColLenOfFixedLengthTypes(colType);
}
- ColStatistics colStats = new ColStatistics(tabAlias, colName, colType);
+ ColStatistics colStats = new ColStatistics(colName, colType);
colStats.setAvgColLen(avgColSize);
colStats.setCountDistint(countDistincts);
colStats.setNumNulls(numNulls);
@@ -1316,40 +1349,6 @@ public class StatsUtils {
return result;
}
- /**
- * Returns fully qualified name of column
- * @param tabName
- * @param colName
- * @return
- */
- public static String getFullyQualifiedColumnName(String tabName, String colName) {
- return getFullyQualifiedName(null, tabName, colName);
- }
-
- /**
- * Returns fully qualified name of column
- * @param dbName
- * @param tabName
- * @param colName
- * @return
- */
- public static String getFullyQualifiedColumnName(String dbName, String tabName, String colName) {
- return getFullyQualifiedName(dbName, tabName, colName);
- }
-
- /**
- * Returns fully qualified name of column
- * @param dbName
- * @param tabName
- * @param partName
- * @param colName
- * @return
- */
- public static String getFullyQualifiedColumnName(String dbName, String tabName, String partName,
- String colName) {
- return getFullyQualifiedName(dbName, tabName, partName, colName);
- }
-
public static String getFullyQualifiedTableName(String dbName, String tabName) {
return getFullyQualifiedName(dbName, tabName);
}
@@ -1365,80 +1364,21 @@ public class StatsUtils {
}
/**
- * Get fully qualified column name from output key column names and column expression map
+ * Get qualified column name from output key column names
* @param keyExprs
* - output key names
- * @param map
- * - column expression map
- * @return list of fully qualified names
+ * @return list of qualified names
*/
- public static List<String> getFullyQualifedReducerKeyNames(List<String> keyExprs,
- Map<String, ExprNodeDesc> map) {
+ public static List<String> getQualifedReducerKeyNames(List<String> keyExprs) {
List<String> result = Lists.newArrayList();
if (keyExprs != null) {
for (String key : keyExprs) {
- String colName = key;
- ExprNodeDesc end = map.get(colName);
- // if we couldn't get expression try prepending "KEY." prefix to reducer key column names
- if (end == null) {
- colName = Utilities.ReduceField.KEY.toString() + "." + key;
- end = map.get(colName);
- if (end == null) {
- continue;
- }
- }
- if (end instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
- String tabAlias = encd.getTabAlias();
- result.add(getFullyQualifiedColumnName(tabAlias, colName));
- } else if (end instanceof ExprNodeGenericFuncDesc) {
- ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end;
- String tabAlias = "";
- for (ExprNodeDesc childEnd : enf.getChildren()) {
- if (childEnd instanceof ExprNodeColumnDesc) {
- tabAlias = ((ExprNodeColumnDesc) childEnd).getTabAlias();
- break;
- }
- }
- result.add(getFullyQualifiedColumnName(tabAlias, colName));
- } else if (end instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end;
- result.add(encd.getValue().toString());
- }
+ result.add(Utilities.ReduceField.KEY.toString() + "." + key);
}
}
return result;
}
- /**
- * Returns all table aliases from expression nodes
- * @param columnExprMap - column expression map
- * @return
- */
- public static Set<String> getAllTableAlias(
- Map<String, ExprNodeDesc> columnExprMap) {
- Set<String> result = new HashSet<String>();
- if (columnExprMap != null) {
- for (ExprNodeDesc end : columnExprMap.values()) {
- getTableAliasFromExprNode(end, result);
- }
- }
- return result;
- }
-
- private static void getTableAliasFromExprNode(ExprNodeDesc end,
- Set<String> output) {
-
- if (end instanceof ExprNodeColumnDesc) {
- output.add(((ExprNodeColumnDesc) end).getTabAlias());
- } else if (end instanceof ExprNodeGenericFuncDesc) {
- for (ExprNodeDesc child : end.getChildren()) {
- getTableAliasFromExprNode(child, output);
- }
- }
-
- }
-
public static long getAvailableMemory(Configuration conf) {
int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE);
if (memory <= 0) {
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index aa66bc6..492e302 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -439,14 +439,14 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: false (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -563,14 +563,14 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: false (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -601,14 +601,14 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: false (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -639,14 +639,14 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: false (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
index 5f8b6f8..7300ea0 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
@@ -160,6 +160,6 @@ STAGE PLANS:
Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 241192b..fc65ef7 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -56,11 +56,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
ListSink
PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging
@@ -287,14 +287,14 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
Filter Operator
predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
ListSink
PREHOOK: query: -- partition level partial column statistics
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out
index 753ab4e..306b870 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out
@@ -1143,14 +1143,14 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (not bo1) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: bo1 (type: boolean)
outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
index 9bf82ac..9221ba8 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
@@ -44,11 +44,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: lastname (type: string), deptid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
ListSink
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join30.q.out b/ql/src/test/results/clientpositive/auto_join30.q.out
index b068493..5437b7f 100644
--- a/ql/src/test/results/clientpositive/auto_join30.q.out
+++ b/ql/src/test/results/clientpositive/auto_join30.q.out
@@ -84,10 +84,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -144,10 +146,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -330,10 +334,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -535,10 +541,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -767,10 +775,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -840,10 +850,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -891,10 +903,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1143,10 +1157,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1216,10 +1232,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1460,10 +1478,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1726,10 +1746,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1992,10 +2014,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join31.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out
index 1e19dd0..0b25134 100644
--- a/ql/src/test/results/clientpositive/auto_join31.q.out
+++ b/ql/src/test/results/clientpositive/auto_join31.q.out
@@ -101,10 +101,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -174,10 +176,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out
index bfc8be8..f862870 100644
--- a/ql/src/test/results/clientpositive/auto_join32.q.out
+++ b/ql/src/test/results/clientpositive/auto_join32.q.out
@@ -42,10 +42,10 @@ STAGE PLANS:
s
TableScan
alias: s
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: name is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
HashTable Sink Operator
keys:
0 name (type: string)
@@ -56,10 +56,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: v
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: name is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -67,18 +67,18 @@ STAGE PLANS:
0 name (type: string)
1 name (type: string)
outputColumnNames: _col0, _col8
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: count(DISTINCT _col8)
keys: _col0 (type: string), _col8 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
@@ -87,10 +87,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -156,10 +156,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: name is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
@@ -263,10 +263,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: name is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
@@ -394,14 +394,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: v
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: ((p = 'bar') and name is not null) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: name (type: string), registration (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out
index 9100762..d75d6c4 100644
--- a/ql/src/test/results/clientpositive/auto_join_stats.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out
@@ -57,8 +57,10 @@ STAGE PLANS:
src2
TableScan
alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 key (type: string)
@@ -69,8 +71,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -78,8 +82,10 @@ STAGE PLANS:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -144,8 +150,10 @@ STAGE PLANS:
src1
TableScan
alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 key (type: string)
@@ -156,8 +164,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -165,8 +175,10 @@ STAGE PLANS:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -291,8 +303,10 @@ STAGE PLANS:
src2
TableScan
alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 key (type: string)
@@ -303,8 +317,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -312,8 +328,10 @@ STAGE PLANS:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -347,10 +365,10 @@ STAGE PLANS:
smalltable2
TableScan
alias: smalltable2
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: UDFToDouble(key) is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 (_col0 + _col5) (type: double)
@@ -403,8 +421,10 @@ STAGE PLANS:
src1
TableScan
alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 key (type: string)
@@ -415,8 +435,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -424,8 +446,10 @@ STAGE PLANS:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join_stats2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out
index ed09875..a0aefa3 100644
--- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out
@@ -43,10 +43,10 @@ STAGE PLANS:
smalltable
TableScan
alias: smalltable
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: UDFToDouble(key) is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 (_col0 + _col5) (type: double)
@@ -184,10 +184,10 @@ STAGE PLANS:
smalltable2
TableScan
alias: smalltable2
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: UDFToDouble(key) is not null (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 (_col0 + _col5) (type: double)