You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2015/05/27 06:20:50 UTC
[2/2] hive git commit: HIVE-10812 : Scaling PK/FK's selectivity for
stats annotation (Pengcheng Xiong via John Pullokkaran)
HIVE-10812 : Scaling PK/FK's selectivity for stats annotation (Pengcheng Xiong via John Pullokkaran)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Conflicts:
ql/src/test/results/clientpositive/udf_crc32.q.out
ql/src/test/results/clientpositive/udf_sha1.q.out
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5f8b7016
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5f8b7016
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5f8b7016
Branch: refs/heads/branch-1.2
Commit: 5f8b7016b7d48343e34a4c9baaf971d2279625db
Parents: 5bb039c
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Tue May 26 11:45:00 2015 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue May 26 21:20:11 2015 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/Utilities.java | 2 +
.../stats/annotation/StatsRulesProcFactory.java | 155 +++++++++++--------
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 35 ++++-
.../clientpositive/annotate_stats_join.q.out | 8 +-
.../annotate_stats_join_pkfk.q.out | 42 ++---
.../spark/annotate_stats_join.q.out | 10 +-
.../tez/vector_null_projection.q.out | 10 ++
.../results/clientpositive/vector_join30.q.out | 24 +++
8 files changed, 194 insertions(+), 92 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index ad5c8f8..9582e3a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -140,6 +140,7 @@ import org.apache.hadoop.hive.ql.metadata.InputEstimator;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
@@ -1095,6 +1096,7 @@ public final class Utilities {
kryo.setInstantiatorStrategy(new StdInstantiatorStrategy());
removeField(kryo, Operator.class, "colExprMap");
removeField(kryo, ColumnInfo.class, "objectInspector");
+ removeField(kryo, AbstractOperatorDesc.class, "statistics");
return kryo;
};
};
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 571c050..0982059 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -77,6 +77,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
@@ -1061,7 +1062,6 @@ public class StatsRulesProcFactory {
numAttr = keyExprs.size();
// infer PK-FK relationship in single attribute join case
- pkfkInferred = false;
inferPKFKRelationship();
// get the join keys from parent ReduceSink operators
for (int pos = 0; pos < parents.size(); pos++) {
@@ -1197,53 +1197,42 @@ public class StatsRulesProcFactory {
private void inferPKFKRelationship() {
if (numAttr == 1) {
- List<Integer> parentsWithPK = getPrimaryKeyCandidates(parents);
-
- // in case of fact to many dimensional tables join, the join key in fact table will be
- // mostly foreign key which will have corresponding primary key in dimension table.
- // The selectivity of fact table in that case will be product of all selectivities of
- // dimension tables (assumes conjunctivity)
- for (Integer id : parentsWithPK) {
- ColStatistics csPK = null;
- Operator<? extends OperatorDesc> parent = parents.get(id);
- for (ColStatistics cs : parent.getStatistics().getColumnStats()) {
- if (cs.isPrimaryKey()) {
- csPK = cs;
- break;
- }
- }
+ // If numAttr is 1, this means we join on one single key column.
+ Map<Integer, ColStatistics> parentsWithPK = getPrimaryKeyCandidates(parents);
- // infer foreign key candidates positions
- List<Integer> parentsWithFK = getForeignKeyCandidates(parents, csPK);
- if (parentsWithFK.size() == 1 &&
- parentsWithFK.size() + parentsWithPK.size() == parents.size()) {
- Operator<? extends OperatorDesc> parentWithFK = parents.get(parentsWithFK.get(0));
- List<Float> parentsSel = getSelectivity(parents, parentsWithPK);
- Float prodSelectivity = 1.0f;
- for (Float selectivity : parentsSel) {
- prodSelectivity *= selectivity;
- }
- newNumRows = (long) Math.ceil(
- parentWithFK.getStatistics().getNumRows() * prodSelectivity);
- pkfkInferred = true;
+ // We only allow one single PK.
+ if (parentsWithPK.size() != 1) {
+ LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents.");
+ return;
+ }
+ Integer pkPos = parentsWithPK.keySet().iterator().next();
+ ColStatistics csPK = parentsWithPK.values().iterator().next();
- // some debug information
- if (isDebugEnabled) {
- List<String> parentIds = Lists.newArrayList();
+ // infer foreign key candidates positions
+ Map<Integer, ColStatistics> csFKs = getForeignKeyCandidates(parents, csPK);
- // print primary key containing parents
- for (Integer i : parentsWithPK) {
- parentIds.add(parents.get(i).toString());
- }
- LOG.debug("STATS-" + jop.toString() + ": PK parent id(s) - " + parentIds);
- parentIds.clear();
+ // we allow multiple foreign keys (snowflake schema)
+ // csfKs.size() + 1 == parents.size() means we have a single PK and all
+ // the rest ops are FKs.
+ if (csFKs.size() + 1 == parents.size()) {
+ getSelectivity(parents, pkPos, csPK, csFKs);
- // print foreign key containing parents
- for (Integer i : parentsWithFK) {
- parentIds.add(parents.get(i).toString());
- }
- LOG.debug("STATS-" + jop.toString() + ": FK parent id(s) - " + parentIds);
+ // some debug information
+ if (isDebugEnabled) {
+ List<String> parentIds = Lists.newArrayList();
+
+ // print primary key containing parents
+ for (Integer i : parentsWithPK.keySet()) {
+ parentIds.add(parents.get(i).toString());
}
+ LOG.debug("STATS-" + jop.toString() + ": PK parent id(s) - " + parentIds);
+ parentIds.clear();
+
+ // print foreign key containing parents
+ for (Integer i : csFKs.keySet()) {
+ parentIds.add(parents.get(i).toString());
+ }
+ LOG.debug("STATS-" + jop.toString() + ": FK parent id(s) - " + parentIds);
}
}
}
@@ -1251,19 +1240,63 @@ public class StatsRulesProcFactory {
/**
* Get selectivity of reduce sink operators.
- * @param ops - reduce sink operators
- * @param opsWithPK - reduce sink operators with primary keys
- * @return - list of selectivity for primary key containing operators
+ * @param csPK - ColStatistics for a single primary key
+ * @param csFKs - ColStatistics for multiple foreign keys
*/
- private List<Float> getSelectivity(List<Operator<? extends OperatorDesc>> ops,
- List<Integer> opsWithPK) {
- List<Float> result = Lists.newArrayList();
- for (Integer idx : opsWithPK) {
- Operator<? extends OperatorDesc> op = ops.get(idx);
- float selectivity = getSelectivitySimpleTree(op);
- result.add(selectivity);
+ private void getSelectivity(List<Operator<? extends OperatorDesc>> ops, Integer pkPos, ColStatistics csPK,
+ Map<Integer, ColStatistics> csFKs) {
+ this.pkfkInferred = true;
+ double pkfkSelectivity = Double.MAX_VALUE;
+ int fkInd = -1;
+ // 1. We iterate through all the operators that have candidate FKs and
+ // choose the FK that has the minimum selectivity. We assume that PK and this FK
+ // have the PK-FK relationship. This is heuristic and can be
+ // improved later.
+ for (Entry<Integer, ColStatistics> entry : csFKs.entrySet()) {
+ int pos = entry.getKey();
+ Operator<? extends OperatorDesc> opWithPK = ops.get(pkPos);
+ double selectivity = getSelectivitySimpleTree(opWithPK);
+ double selectivityAdjustment = StatsUtils.getScaledSelectivity(csPK, entry.getValue());
+ selectivity = selectivityAdjustment * selectivity > 1 ? selectivity : selectivityAdjustment
+ * selectivity;
+ if (selectivity < pkfkSelectivity) {
+ pkfkSelectivity = selectivity;
+ fkInd = pos;
+ }
+ }
+ long newrows = 1;
+ List<Long> rowCounts = Lists.newArrayList();
+ List<Long> distinctVals = Lists.newArrayList();
+ // 2. We then iterate through all the operators that have candidate FKs again.
+ // We assume the PK is first joining with the FK that we just selected.
+ // And we apply the PK-FK relationship when we compute the newrows and ndv.
+ // After that, we join the result with all the other FKs.
+ // We do not assume the PK-FK relationship anymore and just compute the
+ // row count using the classic formula.
+ for (Entry<Integer, ColStatistics> entry : csFKs.entrySet()) {
+ int pos = entry.getKey();
+ ColStatistics csFK = entry.getValue();
+ ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
+ Statistics parentStats = parent.getStatistics();
+ if (fkInd == pos) {
+ // 2.1 This is the new number of rows after PK is joining with FK
+ newrows = (long) Math.ceil(parentStats.getNumRows() * pkfkSelectivity);
+ rowCounts.add(newrows);
+ // 2.1 The ndv is the minimum of the PK and the FK.
+ distinctVals.add(Math.min(csFK.getCountDistint(), csPK.getCountDistint()));
+ } else {
+ // 2.2 All the other FKs.
+ rowCounts.add(parentStats.getNumRows());
+ distinctVals.add(csFK.getCountDistint());
+ }
+ }
+ if (csFKs.size() == 1) {
+ // there is only one FK
+ this.newNumRows = newrows;
+ } else {
+ // there is more than one FK
+ this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
}
- return result;
}
private float getSelectivitySimpleTree(Operator<? extends OperatorDesc> op) {
@@ -1323,11 +1356,11 @@ public class StatsRulesProcFactory {
* primary key range (inferred as foreign keys).
* @param ops - operators
* @param csPK - column statistics of primary key
- * @return - list of foreign key containing parent ids
+ * @return - a map which contains position ids and the corresponding column statistics
*/
- private List<Integer> getForeignKeyCandidates(List<Operator<? extends OperatorDesc>> ops,
+ private Map<Integer, ColStatistics> getForeignKeyCandidates(List<Operator<? extends OperatorDesc>> ops,
ColStatistics csPK) {
- List<Integer> result = Lists.newArrayList();
+ Map<Integer, ColStatistics> result = new HashMap<Integer, ColStatistics>();
if (csPK == null || ops == null) {
return result;
}
@@ -1343,7 +1376,7 @@ public class StatsRulesProcFactory {
ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol);
if (cs != null && !cs.isPrimaryKey()) {
if (StatsUtils.inferForeignKey(csPK, cs)) {
- result.add(i);
+ result.put(i,cs);
}
}
}
@@ -1358,8 +1391,8 @@ public class StatsRulesProcFactory {
* @param ops - operators
* @return - list of primary key containing parent ids
*/
- private List<Integer> getPrimaryKeyCandidates(List<Operator<? extends OperatorDesc>> ops) {
- List<Integer> result = Lists.newArrayList();
+ private Map<Integer, ColStatistics> getPrimaryKeyCandidates(List<Operator<? extends OperatorDesc>> ops) {
+ Map<Integer, ColStatistics> result = new HashMap<Integer, ColStatistics>();
if (ops != null && !ops.isEmpty()) {
for (int i = 0; i < ops.size(); i++) {
Operator<? extends OperatorDesc> op = ops.get(i);
@@ -1371,7 +1404,7 @@ public class StatsRulesProcFactory {
if (rsOp.getStatistics() != null) {
ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol);
if (cs != null && cs.isPrimaryKey()) {
- result.add(i);
+ result.put(i, cs);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 4cd9120..ad481bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -300,7 +300,10 @@ public class StatsUtils {
public static void inferAndSetPrimaryKey(long numRows, List<ColStatistics> colStats) {
if (colStats != null) {
for (ColStatistics cs : colStats) {
- if (cs != null && cs.getRange() != null && cs.getRange().minValue != null &&
+ if (cs != null && cs.getCountDistint() >= numRows) {
+ cs.setPrimaryKey(true);
+ }
+ else if (cs != null && cs.getRange() != null && cs.getRange().minValue != null &&
cs.getRange().maxValue != null) {
if (numRows ==
((cs.getRange().maxValue.longValue() - cs.getRange().minValue.longValue()) + 1)) {
@@ -330,6 +333,36 @@ public class StatsUtils {
return false;
}
+ /**
+ * Scale selectivity based on key range ratio.
+ * @param csPK - column statistics of primary key
+ * @param csFK - column statistics of potential foreign key
+ * @return
+ */
+ public static float getScaledSelectivity(ColStatistics csPK, ColStatistics csFK) {
+ float scaledSelectivity = 1.0f;
+ if (csPK != null && csFK != null) {
+ if (csPK.isPrimaryKey()) {
+ // Use Max-Min Range as NDV gets scaled by selectivity.
+ if (csPK.getRange() != null && csFK.getRange() != null) {
+ long pkRangeDelta = getRangeDelta(csPK.getRange());
+ long fkRangeDelta = getRangeDelta(csFK.getRange());
+ if (fkRangeDelta > 0 && pkRangeDelta > 0 && fkRangeDelta < pkRangeDelta) {
+ scaledSelectivity = (float) pkRangeDelta / (float) fkRangeDelta;
+ }
+ }
+ }
+ }
+ return scaledSelectivity;
+ }
+
+ private static long getRangeDelta(ColStatistics.Range range) {
+ if (range.minValue != null && range.maxValue != null) {
+ return (range.maxValue.longValue() - range.minValue.longValue());
+ }
+ return 0;
+ }
+
private static boolean isWithin(ColStatistics.Range range1, ColStatistics.Range range2) {
if (range1.minValue != null && range2.minValue != null && range1.maxValue != null &&
range2.maxValue != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/test/results/clientpositive/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out
index 66e944b..bc44cc3 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out
@@ -202,10 +202,10 @@ STAGE PLANS:
0 _col1 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -497,10 +497,10 @@ STAGE PLANS:
1 _col0 (type: int)
2 _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
index 66e0e9f..dd70708 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
@@ -379,14 +379,14 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col1
- Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 111 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -448,14 +448,14 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col2
- Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col2 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 107 Data size: 428 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -517,14 +517,14 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col1
- Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -672,14 +672,14 @@ STAGE PLANS:
1 _col0 (type: int)
2 _col0 (type: int)
outputColumnNames: _col1
- Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -758,14 +758,14 @@ STAGE PLANS:
1 _col0 (type: int)
2 _col0 (type: int)
outputColumnNames: _col1
- Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 38 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -844,14 +844,14 @@ STAGE PLANS:
1 _col0 (type: int)
2 _col0 (type: int)
outputColumnNames: _col1
- Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -930,14 +930,14 @@ STAGE PLANS:
1 _col0 (type: int)
2 _col0 (type: int)
outputColumnNames: _col2
- Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 273 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
index 75ad4e7..032926d 100644
--- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
@@ -202,14 +202,14 @@ STAGE PLANS:
0 deptid (type: int)
1 deptid (type: int)
outputColumnNames: _col0, _col1, _col2, _col6, _col7
- Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -529,10 +529,10 @@ STAGE PLANS:
1 _col0 (type: int)
2 _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
index 9b7b698..6af333d 100644
--- a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
@@ -104,30 +104,40 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Group By Operator
keys: null (type: void)
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: void)
sort order: +
Map-reduce partition columns: _col0 (type: void)
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Map 4
Map Operator Tree:
TableScan
alias: b
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Group By Operator
keys: null (type: void)
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: void)
sort order: +
Map-reduce partition columns: _col0 (type: void)
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Reducer 3
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/5f8b7016/ql/src/test/results/clientpositive/vector_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join30.q.out b/ql/src/test/results/clientpositive/vector_join30.q.out
index 57f9aeb..cfe047d 100644
--- a/ql/src/test/results/clientpositive/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/vector_join30.q.out
@@ -99,10 +99,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -159,10 +161,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -347,10 +351,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -554,10 +560,12 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -788,10 +796,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -861,10 +871,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -912,10 +924,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1167,10 +1181,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1240,10 +1256,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1487,10 +1505,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -1756,10 +1776,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -2025,10 +2047,12 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(hash(_col2,_col3))
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table: