You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/06 19:58:02 UTC
svn commit: r1616292 [1/2] - in /hive/trunk: data/files/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/
ql/src/java/org/apache/hadoop/hive/ql/stats/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
ql/src/test/...
Author: hashutosh
Date: Wed Aug 6 17:58:01 2014
New Revision: 1616292
URL: http://svn.apache.org/r1616292
Log:
HIVE-7589 : Some fixes and improvements to statistics annotation rules (Prasanth J via Ashutosh Chauhan)
Modified:
hive/trunk/data/files/dept.txt
hive/trunk/data/files/emp.txt
hive/trunk/data/files/loc.txt
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out
hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out
hive/trunk/ql/src/test/results/clientpositive/input24.q.out
hive/trunk/ql/src/test/results/clientpositive/input25.q.out
hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out
hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out
hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out
hive/trunk/ql/src/test/results/clientpositive/union11.q.out
hive/trunk/ql/src/test/results/clientpositive/union14.q.out
hive/trunk/ql/src/test/results/clientpositive/union15.q.out
hive/trunk/ql/src/test/results/clientpositive/union17.q.out
hive/trunk/ql/src/test/results/clientpositive/union19.q.out
hive/trunk/ql/src/test/results/clientpositive/union20.q.out
hive/trunk/ql/src/test/results/clientpositive/union21.q.out
hive/trunk/ql/src/test/results/clientpositive/union5.q.out
hive/trunk/ql/src/test/results/clientpositive/union7.q.out
Modified: hive/trunk/data/files/dept.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/dept.txt?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/data/files/dept.txt (original)
+++ hive/trunk/data/files/dept.txt Wed Aug 6 17:58:01 2014
@@ -2,3 +2,5 @@
33|engineering
34|clerical
35|marketing
+36|transport
+37|hr
Modified: hive/trunk/data/files/emp.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/emp.txt?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/data/files/emp.txt (original)
+++ hive/trunk/data/files/emp.txt Wed Aug 6 17:58:01 2014
@@ -1,6 +1,48 @@
-Rafferty|31
-Jones|33
-Steinberg|33
-Robinson|34
-Smith|34
-John|
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
Modified: hive/trunk/data/files/loc.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/loc.txt?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/data/files/loc.txt (original)
+++ hive/trunk/data/files/loc.txt Wed Aug 6 17:58:01 2014
@@ -1,8 +1,8 @@
-OH|31|43201|2001
-IO|32|43202|2001
-CA|35|43809|2001
-FL|33|54342|2001
-UT|35||2001
-CA|35|43809|2001
-|34|40000|
-FL|33|54342|2001
+OH|1|43201|2001
+IO|2|43202|2001
+CA|5|43809|2001
+FL|3|54342|2001
+UT|5||2001
+CA|5|43809|2001
+|4|40000|
+FL|6|54342|2001
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java Wed Aug 6 17:58:01 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -62,6 +63,8 @@ public class AnnotateWithStatistics impl
+ MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule());
opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"),
StatsRulesProcFactory.getLimitRule());
+ opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"),
+ StatsRulesProcFactory.getReduceSinkRule());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Wed Aug 6 17:58:01 2014
@@ -601,12 +601,18 @@ public class StatsRulesProcFactory {
}
dvProd *= dv;
} else {
-
- // partial column statistics on grouping attributes case.
- // if column statistics on grouping attribute is missing, then
- // assume worst case.
- // GBY rule will emit half the number of rows if dvProd is 0
- dvProd = 0;
+ if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
+ // the column must be an aggregate column inserted by GBY. We
+ // don't have to account for this column when computing product
+ // of NDVs
+ continue;
+ } else {
+ // partial column statistics on grouping attributes case.
+ // if column statistics on grouping attribute is missing, then
+ // assume worst case.
+ // GBY rule will emit half the number of rows if dvProd is 0
+ dvProd = 0;
+ }
break;
}
}
@@ -687,7 +693,17 @@ public class StatsRulesProcFactory {
aggColStats.add(cs);
}
}
- stats.addToColumnStats(aggColStats);
+
+ // add the new aggregate column and recompute data size
+ if (aggColStats.size() > 0) {
+ stats.addToColumnStats(aggColStats);
+
+ // only if the column stats is available, update the data size from
+ // the column stats
+ if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
+ updateStats(stats, stats.getNumRows(), true);
+ }
+ }
// if UDAF present and if column expression map is empty then it must
// be full aggregation query like count(*) in which case number of
@@ -734,15 +750,24 @@ public class StatsRulesProcFactory {
* <p>
* In the absence of histograms, we can use the following general case
* <p>
- * <b>Single attribute</b>
+ * <b>2 Relations, 1 attribute</b>
* <p>
* T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute
* <p>
- * <b>Multiple attributes</b>
+ * <b>2 Relations, 2 attributes</b>
* <p>
* T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join
* attributes
* <p>
+ * <b>3 Relations, 1 attributes</b>
+ * <p>
+ * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y), V(S,y), V(Q,y)), where y is the join attribute
+ * <p>
+ * <b>3 Relations, 2 attributes</b>
+ * <p>
+ * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y1), V(S,y1), V(Q,y1)) * top2largest(V(R,y2), V(S,y2), V(Q,y2)),
+ * where y1 and y2 are the join attributes
+ * <p>
* <i>Worst case:</i> If no column statistics are available, then T(RXS) = joinFactor * max(T(R),
* T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor
* hive config. In the worst case, since we do not know any information about join keys (and hence
@@ -783,9 +808,12 @@ public class StatsRulesProcFactory {
// statistics object that is combination of statistics from all
// relations involved in JOIN
Statistics stats = new Statistics();
- long prodRows = 1;
+ List<Long> rowCountParents = Lists.newArrayList();
List<Long> distinctVals = Lists.newArrayList();
+
+ // 2 relations, multiple attributes
boolean multiAttr = false;
+ int numAttr = 1;
Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
Map<Integer, List<String>> joinKeys = Maps.newHashMap();
@@ -795,12 +823,13 @@ public class StatsRulesProcFactory {
ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
Statistics parentStats = parent.getStatistics();
- prodRows *= parentStats.getNumRows();
+ rowCountParents.add(parentStats.getNumRows());
List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols();
// multi-attribute join key
if (keyExprs.size() > 1) {
multiAttr = true;
+ numAttr = keyExprs.size();
}
// compute fully qualified join key column names. this name will be
@@ -811,16 +840,9 @@ public class StatsRulesProcFactory {
StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap());
joinKeys.put(pos, fqCols);
- Map<String, ExprNodeDesc> colExprMap = parent.getColumnExprMap();
- RowSchema rs = parent.getSchema();
-
// get column statistics for all output columns
- List<ColStatistics> cs =
- StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
- for (ColStatistics c : cs) {
- if (c != null) {
- joinedColStats.put(c.getFullyQualifiedColName(), c);
- }
+ for (ColStatistics cs : parentStats.getColumnStats()) {
+ joinedColStats.put(cs.getFullyQualifiedColName(), cs);
}
// since new statistics is derived from all relations involved in
@@ -834,10 +856,10 @@ public class StatsRulesProcFactory {
long denom = 1;
if (multiAttr) {
List<Long> perAttrDVs = Lists.newArrayList();
- int numAttr = joinKeys.get(0).size();
for (int idx = 0; idx < numAttr; idx++) {
for (Integer i : joinKeys.keySet()) {
String col = joinKeys.get(i).get(idx);
+ col = StatsUtils.stripPrefixFromColumnName(col);
ColStatistics cs = joinedColStats.get(col);
if (cs != null) {
perAttrDVs.add(cs.getCountDistint());
@@ -853,6 +875,7 @@ public class StatsRulesProcFactory {
} else {
for (List<String> jkeys : joinKeys.values()) {
for (String jk : jkeys) {
+ jk = StatsUtils.stripPrefixFromColumnName(jk);
ColStatistics cs = joinedColStats.get(jk);
if (cs != null) {
distinctVals.add(cs.getCountDistint());
@@ -862,6 +885,11 @@ public class StatsRulesProcFactory {
denom = getDenominator(distinctVals);
}
+ // Update NDV of joined columns to be min(V(R,y), V(S,y))
+ if (multiAttr) {
+ updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr);
+ }
+
// column statistics from different sources are put together and rename
// fully qualified column names based on output schema of join operator
Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
@@ -878,7 +906,6 @@ public class StatsRulesProcFactory {
ColStatistics cs = joinedColStats.get(fqColName);
String outColName = key;
String outTabAlias = ci.getTabAlias();
- outColName = StatsUtils.stripPrefixFromColumnName(outColName);
if (cs != null) {
cs.setColumnName(outColName);
cs.setTableAlias(outTabAlias);
@@ -889,13 +916,21 @@ public class StatsRulesProcFactory {
// update join statistics
stats.setColumnStats(outColStats);
- long newRowCount = prodRows / denom;
+ long newRowCount = computeNewRowCount(rowCountParents, denom);
+
+ if (newRowCount <= 0 && LOG.isDebugEnabled()) {
+ newRowCount = 0;
+ LOG.debug("[0] STATS-" + jop.toString() + ": Product of #rows might be greater than"
+ + " denominator or overflow might have occurred. Resetting row count to 0."
+ + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom);
+ }
+
stats.setNumRows(newRowCount);
stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats));
jop.setStatistics(stats);
if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString());
+ LOG.debug("[1] STATS-" + jop.toString() + ": " + stats.extendedToString());
}
} else {
@@ -930,13 +965,72 @@ public class StatsRulesProcFactory {
jop.setStatistics(wcStats);
if (LOG.isDebugEnabled()) {
- LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
+ LOG.debug("[2] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
}
}
}
return null;
}
+ private long computeNewRowCount(List<Long> rowCountParents, long denom) {
+ double factor = 0.0d;
+ long result = 1;
+ long max = rowCountParents.get(0);
+ long maxIdx = 0;
+
+ // To avoid long overflow, we will divide the max row count by denominator
+ // and use that factor to multiply with other row counts
+ for (int i = 1; i < rowCountParents.size(); i++) {
+ if (rowCountParents.get(i) > max) {
+ max = rowCountParents.get(i);
+ maxIdx = i;
+ }
+ }
+
+ factor = (double) max / (double) denom;
+
+ for (int i = 0; i < rowCountParents.size(); i++) {
+ if (i != maxIdx) {
+ result *= rowCountParents.get(i);
+ }
+ }
+
+ result = (long) (result * factor);
+
+ return result;
+ }
+
+ private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys,
+ Map<String, ColStatistics> joinedColStats, int numAttr) {
+ int joinColIdx = 0;
+ while (numAttr > 0) {
+ long minNDV = Long.MAX_VALUE;
+
+ // find min NDV for joining columns
+ for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+ String key = entry.getValue().get(joinColIdx);
+ key = StatsUtils.stripPrefixFromColumnName(key);
+ ColStatistics cs = joinedColStats.get(key);
+ if (cs != null && cs.getCountDistint() < minNDV) {
+ minNDV = cs.getCountDistint();
+ }
+ }
+
+ // set min NDV value to both columns involved in join
+ if (minNDV != Long.MAX_VALUE) {
+ for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+ String key = entry.getValue().get(joinColIdx);
+ key = StatsUtils.stripPrefixFromColumnName(key);
+ ColStatistics cs = joinedColStats.get(key);
+ cs.setCountDistint(minNDV);
+ }
+ }
+
+ joinColIdx++;
+ numAttr--;
+ }
+ }
+
private long getDenominator(List<Long> distinctVals) {
if (distinctVals.isEmpty()) {
@@ -954,16 +1048,23 @@ public class StatsRulesProcFactory {
return Collections.max(distinctVals);
} else {
+ // remember min value and ignore it from the denominator
+ long minNDV = distinctVals.get(0);
+ int minIdx = 0;
+
+ for (int i = 1; i < distinctVals.size(); i++) {
+ if (distinctVals.get(i) < minNDV) {
+ minNDV = distinctVals.get(i);
+ minIdx = i;
+ }
+ }
+
// join from multiple relations:
- // denom = max(v1, v2) * max(v2, v3) * max(v3, v4)
+ // denom = Product of all NDVs except the least of all
long denom = 1;
- for (int i = 0; i < distinctVals.size() - 1; i++) {
- long v1 = distinctVals.get(i);
- long v2 = distinctVals.get(i + 1);
- if (v1 >= v2) {
- denom *= v1;
- } else {
- denom *= v2;
+ for (int i = 0; i < distinctVals.size(); i++) {
+ if (i != minIdx) {
+ denom *= distinctVals.get(i);
}
}
return denom;
@@ -983,8 +1084,6 @@ public class StatsRulesProcFactory {
LimitOperator lop = (LimitOperator) nd;
Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
Statistics parentStats = parent.getStatistics();
- AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
- HiveConf conf = aspCtx.getConf();
try {
long limit = -1;
@@ -1032,6 +1131,73 @@ public class StatsRulesProcFactory {
}
/**
+ * ReduceSink operator does not change any of the statistics. But it renames
+ * the column statistics from its parent based on the output key and value
+ * column names to make it easy for the downstream operators. This is different
+ * from the default stats which just aggregates and passes along the statistics
+ * without actually renaming based on output schema of the operator.
+ */
+ public static class ReduceSinkStatsRule extends DefaultStatsRule implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ ReduceSinkOperator rop = (ReduceSinkOperator) nd;
+ Operator<? extends OperatorDesc> parent = rop.getParentOperators().get(0);
+ Statistics parentStats = parent.getStatistics();
+ if (parentStats != null) {
+ AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
+ HiveConf conf = aspCtx.getConf();
+
+ List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames();
+ List<String> outValueColNames = rop.getConf().getOutputValueColumnNames();
+ Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap();
+ try {
+ Statistics outStats = parentStats.clone();
+ if (satisfyPrecondition(parentStats)) {
+ List<ColStatistics> colStats = Lists.newArrayList();
+ for (String key : outKeyColNames) {
+ String prefixedKey = "KEY." + key;
+ ExprNodeDesc end = colExprMap.get(prefixedKey);
+ if (end != null) {
+ ColStatistics cs = StatsUtils
+ .getColStatisticsFromExpression(conf, parentStats, end);
+ if (cs != null) {
+ cs.setColumnName(key);
+ colStats.add(cs);
+ }
+ }
+ }
+
+ for (String val : outValueColNames) {
+ String prefixedVal = "VALUE." + val;
+ ExprNodeDesc end = colExprMap.get(prefixedVal);
+ if (end != null) {
+ ColStatistics cs = StatsUtils
+ .getColStatisticsFromExpression(conf, parentStats, end);
+ if (cs != null) {
+ cs.setColumnName(val);
+ colStats.add(cs);
+ }
+ }
+ }
+
+ outStats.setColumnStats(colStats);
+ }
+ rop.setStatistics(outStats);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString());
+ }
+ } catch (CloneNotSupportedException e) {
+ throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
+ }
+ return null;
+ }
+
+ }
+
+ /**
* Default rule is to aggregate the statistics from all its parent operators.
*/
public static class DefaultStatsRule implements NodeProcessor {
@@ -1108,6 +1274,10 @@ public class StatsRulesProcFactory {
return new LimitStatsRule();
}
+ public static NodeProcessor getReduceSinkRule() {
+ return new ReduceSinkStatsRule();
+ }
+
public static NodeProcessor getDefaultRule() {
return new DefaultStatsRule();
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Wed Aug 6 17:58:01 2014
@@ -196,7 +196,8 @@ public class StatsUtils {
stats.addToDataSize(ds);
// if at least a partition does not contain row count then mark basic stats state as PARTIAL
- if (containsNonPositives(rowCounts)) {
+ if (containsNonPositives(rowCounts) &&
+ stats.getBasicStatsState().equals(State.COMPLETE)) {
stats.setBasicStatsState(State.PARTIAL);
}
boolean haveFullStats = fetchColStats;
@@ -860,12 +861,9 @@ public class StatsUtils {
if (colExprMap != null) {
for (ColumnInfo ci : rowSchema.getSignature()) {
String outColName = ci.getInternalName();
+ outColName = StatsUtils.stripPrefixFromColumnName(outColName);
String outTabAlias = ci.getTabAlias();
ExprNodeDesc end = colExprMap.get(outColName);
- if (end == null) {
- outColName = StatsUtils.stripPrefixFromColumnName(outColName);
- end = colExprMap.get(outColName);
- }
ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end);
if (colStat != null) {
outColName = StatsUtils.stripPrefixFromColumnName(outColName);
@@ -1126,7 +1124,7 @@ public class StatsUtils {
*/
public static String stripPrefixFromColumnName(String colName) {
String stripedName = colName;
- if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) {
+ if (colName.startsWith("KEY") || colName.startsWith("VALUE")) {
// strip off KEY./VALUE. from column name
stripedName = colName.split("\\.")[1];
}
@@ -1194,15 +1192,16 @@ public class StatsUtils {
for (Map.Entry<String, ExprNodeDesc> entry : map.entrySet()) {
if (entry.getValue().isSame(end)) {
outColName = entry.getKey();
+ outColName = stripPrefixFromColumnName(outColName);
}
}
if (end instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
if (outColName == null) {
outColName = encd.getColumn();
+ outColName = stripPrefixFromColumnName(outColName);
}
String tabAlias = encd.getTabAlias();
- outColName = stripPrefixFromColumnName(outColName);
result.add(getFullyQualifiedColumnName(tabAlias, outColName));
} else if (end instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end;
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q Wed Aug 6 17:58:01 2014
@@ -15,76 +15,76 @@ load data local inpath '../../data/files
insert overwrite table loc_orc select * from loc_staging;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- column stats are not COMPLETE, so stats are not updated
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- state column has 5 distincts. numRows/countDistincts
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
-- not equals comparison shouldn't affect number of rows
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where state!='OH';
-explain extended select * from loc_orc where state<>'OH';
+explain select * from loc_orc where state!='OH';
+explain select * from loc_orc where state<>'OH';
-- nulls are treated as constant equality comparison
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where zip is null;
+explain select * from loc_orc where zip is null;
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where !(zip is not null);
+explain select * from loc_orc where !(zip is not null);
-- not nulls are treated as inverse of nulls
-- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where zip is not null;
+explain select * from loc_orc where zip is not null;
-- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where !(zip is null);
+explain select * from loc_orc where !(zip is null);
-- NOT evaluation. true will pass all rows, false will not pass any rows
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where !false;
+explain select * from loc_orc where !false;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where !true;
+explain select * from loc_orc where !true;
-- Constant evaluation. true will pass all rows, false will not pass any rows
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true;
+explain select * from loc_orc where true;
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where 'foo';
+explain select * from loc_orc where 'foo';
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true = true;
+explain select * from loc_orc where true = true;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false = true;
+explain select * from loc_orc where false = true;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where 'foo' = 'bar';
+explain select * from loc_orc where 'foo' = 'bar';
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false;
+explain select * from loc_orc where false;
-- OR evaluation. 1 row for OH and 1 row for CA
-- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where state='OH' or state='CA';
+explain select * from loc_orc where state='OH' or state='CA';
-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
-- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where year=2001 and year is null;
+explain select * from loc_orc where year=2001 and year is null;
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where year=2001 and state='OH' and state='FL';
+explain select * from loc_orc where year=2001 and state='OH' and state='FL';
-- AND and OR together. left expr will yield 1 row and right will yield 1 row
-- numRows: 3 rawDataSize: 306
-explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA');
+explain select * from loc_orc where (year=2001 and year is null) or (state='CA');
-- AND and OR together. left expr will yield 8 rows and right will yield 1 row
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA');
+explain select * from loc_orc where (year=2001 or year is null) and (state='CA');
-- all inequality conditions rows/3 is the rules
-- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where locid < 30;
-explain extended select * from loc_orc where locid > 30;
-explain extended select * from loc_orc where locid <= 30;
-explain extended select * from loc_orc where locid >= 30;
+explain select * from loc_orc where locid < 30;
+explain select * from loc_orc where locid > 30;
+explain select * from loc_orc where locid <= 30;
+explain select * from loc_orc where locid >= 30;
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q Wed Aug 6 17:58:01 2014
@@ -15,14 +15,14 @@ load data local inpath '../../data/files
insert overwrite table loc_orc select * from loc_staging;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- partial column stats
analyze table loc_orc compute statistics for columns state;
-- inner group by: map - numRows: 8 reduce - numRows: 4
-- outer group by: map - numRows: 4 reduce numRows: 2
-explain extended select a, c, min(b)
+explain select a, c, min(b)
from ( select state as a, locid as b, count(*) as c
from loc_orc
group by state,locid
@@ -34,36 +34,36 @@ analyze table loc_orc compute statistics
-- only one distinct value in year column + 1 NULL value
-- map-side GBY: numRows: 8 (map-side will not do any reduction)
-- reduce-side GBY: numRows: 2
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
-- map-side GBY: numRows: 8
-- reduce-side GBY: numRows: 4
-explain extended select state,locid from loc_orc group by state,locid;
+explain select state,locid from loc_orc group by state,locid;
-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by state,locid with rollup;
-- map-side GBY numRows: 8 reduce-side GBY numRows: 4
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state));
+explain select state,locid from loc_orc group by state,locid grouping sets((state));
-- map-side GBY numRows: 16 reduce-side GBY numRows: 8
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
set hive.stats.map.parallelism=10;
-- map-side GBY: numRows: 80 (map-side will not do any reduction)
-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q Wed Aug 6 17:58:01 2014
@@ -1,81 +1,70 @@
set hive.stats.fetch.column.stats=true;
+set hive.stats.ndv.error=0.0;
-create table if not exists emp_staging (
+create table if not exists emp (
lastname string,
- deptid int
+ deptid int,
+ locid int
) row format delimited fields terminated by '|' stored as textfile;
-create table if not exists dept_staging (
+create table if not exists dept (
deptid int,
deptname string
) row format delimited fields terminated by '|' stored as textfile;
-create table if not exists loc_staging (
+create table if not exists loc (
state string,
locid int,
zip bigint,
year int
) row format delimited fields terminated by '|' stored as textfile;
-create table if not exists emp_orc like emp_staging;
-alter table emp_orc set fileformat orc;
-
-create table if not exists dept_orc like dept_staging;
-alter table dept_orc set fileformat orc;
-
-create table loc_orc like loc_staging;
-alter table loc_orc set fileformat orc;
-
-LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
-LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging;
-LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
-
-insert overwrite table emp_orc select * from emp_staging;
-insert overwrite table dept_orc select * from dept_staging;
-insert overwrite table loc_orc select * from loc_staging;
-
-analyze table emp_orc compute statistics for columns lastname,deptid;
-analyze table dept_orc compute statistics for columns deptname,deptid;
-analyze table loc_orc compute statistics for columns state,locid,zip,year;
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp;
+LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept;
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc;
+
+analyze table emp compute statistics;
+analyze table dept compute statistics;
+analyze table loc compute statistics;
+analyze table emp compute statistics for columns lastname,deptid,locid;
+analyze table dept compute statistics for columns deptname,deptid;
+analyze table loc compute statistics for columns state,locid,zip,year;
-- number of rows
--- emp_orc - 6
--- dept_orc - 4
--- loc_orc - 8
+-- emp - 48
+-- dept - 6
+-- loc - 8
-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
--- emp_orc.deptid - 3
--- emp_orc.lastname - 7
--- dept_orc.deptid - 6
--- dept_orc.deptname - 5
--- loc_orc.locid - 6
--- loc_orc.state - 7
-
--- Expected output rows: 4
--- Reason: #rows = (6*4)/max(3,6)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid);
-
--- 3 way join
--- Expected output rows: 4
--- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid);
-
--- Expected output rows: 5
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid);
-
--- join keys of different types
--- Expected output rows: 4
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state);
-
--- multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4)/max(3,6)*max(7,5)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname);
-
--- 3 way and multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state);
+-- emp.deptid - 3
+-- emp.lastname - 6
+-- emp.locid - 7
+-- dept.deptid - 7
+-- dept.deptname - 6
+-- loc.locid - 7
+-- loc.state - 6
+
+-- 2 relations, 1 attribute
+-- Expected output rows: (48*6)/max(3,7) = 41
+explain select * from emp e join dept d on (e.deptid = d.deptid);
+
+-- 2 relations, 2 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname;
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname);
+
+-- 2 relations, 3 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- 3 relations, 1 attribute
+-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658
+explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid);
+
+-- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47
+explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid);
+
+-- 3 relations and 2 attribute
+-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state);
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q Wed Aug 6 17:58:01 2014
@@ -17,14 +17,14 @@ insert overwrite table loc_orc select *
analyze table loc_orc compute statistics for columns state, locid, zip, year;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4;
+explain select * from loc_orc limit 4;
-- greater than the available number of rows
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16;
+explain select * from loc_orc limit 16;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0;
+explain select * from loc_orc limit 0;
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q Wed Aug 6 17:58:01 2014
@@ -19,67 +19,67 @@ create table if not exists loc_orc (
) partitioned by(year string) stored as orc;
-- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
insert overwrite table loc_orc partition(year) select * from loc_staging;
-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- partition level analyze statistics for specific parition
analyze table loc_orc partition(year='2001') compute statistics;
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001';
+explain select * from loc_orc where year='2001';
-- partition level analyze statistics for all partitions
analyze table loc_orc partition(year) compute statistics;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
-- both partitions will be pruned
-- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
-- partition level partial column statistics
analyze table loc_orc partition(year='2001') compute statistics for columns state,locid;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select zip from loc_orc;
+explain select zip from loc_orc;
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state from loc_orc;
+explain select state from loc_orc;
-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state,locid from loc_orc;
+explain select state,locid from loc_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select state,locid from loc_orc where year='2001';
+explain select state,locid from loc_orc where year='2001';
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select state,locid from loc_orc where year!='2001';
+explain select state,locid from loc_orc where year!='2001';
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- This is to test filter expression evaluation on partition column
-- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select locid from loc_orc where locid>0 and year='2001';
-explain extended select locid,year from loc_orc where locid>0 and year='2001';
-explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
+explain select locid from loc_orc where locid>0 and year='2001';
+explain select locid,year from loc_orc where locid>0 and year='2001';
+explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q Wed Aug 6 17:58:01 2014
@@ -28,116 +28,116 @@ load data local inpath '../../data/files
insert overwrite table alltypes_orc select * from alltypes;
-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
-- statistics for complex types are not supported yet
analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1;
-- numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
-- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc;
+explain select bo1 from alltypes_orc;
-- col alias renaming
-- numRows: 2 rawDataSize: 8
-explain extended select i1 as int1 from alltypes_orc;
+explain select i1 as int1 from alltypes_orc;
-- numRows: 2 rawDataSize: 174
-explain extended select s1 from alltypes_orc;
+explain select s1 from alltypes_orc;
-- column statistics for complex types unsupported and so statistics will not be updated
-- numRows: 2 rawDataSize: 1514
-explain extended select m1 from alltypes_orc;
+explain select m1 from alltypes_orc;
-- numRows: 2 rawDataSize: 246
-explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
+explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
-- numRows: 2 rawDataSize: 0
-explain extended select null from alltypes_orc;
+explain select null from alltypes_orc;
-- numRows: 2 rawDataSize: 8
-explain extended select 11 from alltypes_orc;
+explain select 11 from alltypes_orc;
-- numRows: 2 rawDataSize: 16
-explain extended select 11L from alltypes_orc;
+explain select 11L from alltypes_orc;
-- numRows: 2 rawDataSize: 16
-explain extended select 11.0 from alltypes_orc;
+explain select 11.0 from alltypes_orc;
-- numRows: 2 rawDataSize: 178
-explain extended select "hello" from alltypes_orc;
-explain extended select cast("hello" as char(5)) from alltypes_orc;
-explain extended select cast("hello" as varchar(5)) from alltypes_orc;
+explain select "hello" from alltypes_orc;
+explain select cast("hello" as char(5)) from alltypes_orc;
+explain select cast("hello" as varchar(5)) from alltypes_orc;
-- numRows: 2 rawDataSize: 96
-explain extended select unbase64("0xe23") from alltypes_orc;
+explain select unbase64("0xe23") from alltypes_orc;
-- numRows: 2 rawDataSize: 16
-explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
+explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
-- numRows: 2 rawDataSize: 80
-explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
-- numRows: 2 rawDataSize: 112
-explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
-- numRows: 2 rawDataSize: 224
-explain extended select cast("58.174" as DECIMAL) from alltypes_orc;
+explain select cast("58.174" as DECIMAL) from alltypes_orc;
-- numRows: 2 rawDataSize: 112
-explain extended select array(1,2,3) from alltypes_orc;
+explain select array(1,2,3) from alltypes_orc;
-- numRows: 2 rawDataSize: 1508
-explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
+explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
-- numRows: 2 rawDataSize: 112
-explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
+explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
-- numRows: 2 rawDataSize: 250
-explain extended select CREATE_UNION(0, "hello") from alltypes_orc;
+explain select CREATE_UNION(0, "hello") from alltypes_orc;
-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
-- numRows: 1 rawDataSize: 8
-explain extended select count(*) from alltypes_orc;
+explain select count(*) from alltypes_orc;
-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
-- numRows: 1 rawDataSize: 8
-explain extended select count(1) from alltypes_orc;
+explain select count(1) from alltypes_orc;
-- column statistics for complex column types will be missing. data size will be calculated from available column statistics
-- numRows: 2 rawDataSize: 254
-explain extended select *,11 from alltypes_orc;
+explain select *,11 from alltypes_orc;
-- subquery selects
-- inner select - numRows: 2 rawDataSize: 8
-- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1 from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 16
-- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1,11 from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 16
-- outer select - numRows: 2 rawDataSize: 186
-explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 24
-- outer select - numRows: 2 rawDataSize: 16
-explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
+explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 104
-- outer select - numRows: 2 rawDataSize: 186
-explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
+explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 186
-- middle select - numRows: 2 rawDataSize: 178
-- outer select - numRows: 2 rawDataSize: 194
-explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
+explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
-- This test is for FILTER operator where filter expression is a boolean column
-- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc where bo1;
+explain select bo1 from alltypes_orc where bo1;
-- numRows: 0 rawDataSize: 0
-explain extended select bo1 from alltypes_orc where !bo1;
+explain select bo1 from alltypes_orc where !bo1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q Wed Aug 6 17:58:01 2014
@@ -10,7 +10,7 @@ create table if not exists emp_orc like
alter table emp_orc set fileformat orc;
-- basicStatState: NONE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
@@ -19,35 +19,35 @@ insert overwrite table emp_orc select *
-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- table level analyze statistics
analyze table emp_orc compute statistics;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- column level partial statistics
analyze table emp_orc compute statistics for columns deptid;
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- all selected columns have statistics
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
-- column level complete statistics
analyze table emp_orc compute statistics for columns lastname,deptid;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname from emp_orc;
+explain select lastname from emp_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname,deptid from emp_orc;
+explain select lastname,deptid from emp_orc;
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q Wed Aug 6 17:58:01 2014
@@ -17,16 +17,16 @@ insert overwrite table loc_orc select *
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- numRows: 8 rawDataSize: 688
-explain extended select state from loc_orc;
+explain select state from loc_orc;
-- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp;
+explain select * from (select state from loc_orc union all select state from loc_orc) tmp;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- numRows: 16 rawDataSize: 1592
-explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp;
+explain select * from (select * from loc_orc union all select * from loc_orc) tmp;
create database test;
use test;
@@ -49,7 +49,7 @@ analyze table loc_staging compute statis
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
+explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
-- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
+explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out Wed Aug 6 17:58:01 2014
@@ -59,27 +59,11 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
PREHOOK: query: -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc
+explain select * from loc_orc
PREHOOK: type: QUERY
POSTHOOK: query: -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc
+explain select * from loc_orc
POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-
-TOK_QUERY
- TOK_FROM
- TOK_TABREF
- TOK_TABNAME
- loc_orc
- TOK_INSERT
- TOK_DESTINATION
- TOK_DIR
- TOK_TMP_FILE
- TOK_SELECT
- TOK_SELEXPR
- TOK_ALLCOLREF
-
-
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -91,7 +75,6 @@ STAGE PLANS:
TableScan
alias: loc_orc
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -99,29 +82,11 @@ STAGE PLANS:
ListSink
PREHOOK: query: -- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4
+explain select * from loc_orc limit 4
PREHOOK: type: QUERY
POSTHOOK: query: -- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4
+explain select * from loc_orc limit 4
POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-
-TOK_QUERY
- TOK_FROM
- TOK_TABREF
- TOK_TABNAME
- loc_orc
- TOK_INSERT
- TOK_DESTINATION
- TOK_DIR
- TOK_TMP_FILE
- TOK_SELECT
- TOK_SELEXPR
- TOK_ALLCOLREF
- TOK_LIMIT
- 4
-
-
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -133,7 +98,6 @@ STAGE PLANS:
TableScan
alias: loc_orc
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -145,30 +109,12 @@ STAGE PLANS:
PREHOOK: query: -- greater than the available number of rows
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16
+explain select * from loc_orc limit 16
PREHOOK: type: QUERY
POSTHOOK: query: -- greater than the available number of rows
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16
+explain select * from loc_orc limit 16
POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-
-TOK_QUERY
- TOK_FROM
- TOK_TABREF
- TOK_TABNAME
- loc_orc
- TOK_INSERT
- TOK_DESTINATION
- TOK_DIR
- TOK_TMP_FILE
- TOK_SELECT
- TOK_SELEXPR
- TOK_ALLCOLREF
- TOK_LIMIT
- 16
-
-
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -180,7 +126,6 @@ STAGE PLANS:
TableScan
alias: loc_orc
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -191,29 +136,11 @@ STAGE PLANS:
ListSink
PREHOOK: query: -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0
+explain select * from loc_orc limit 0
PREHOOK: type: QUERY
POSTHOOK: query: -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0
+explain select * from loc_orc limit 0
POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-
-TOK_QUERY
- TOK_FROM
- TOK_TABREF
- TOK_TABNAME
- loc_orc
- TOK_INSERT
- TOK_DESTINATION
- TOK_DIR
- TOK_TMP_FILE
- TOK_SELECT
- TOK_SELEXPR
- TOK_ALLCOLREF
- TOK_LIMIT
- 0
-
-
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -225,7 +152,6 @@ STAGE PLANS:
TableScan
alias: loc_orc
Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/combine2.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/combine2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/combine2.q.out Wed Aug 6 17:58:01 2014
@@ -671,12 +671,12 @@ STAGE PLANS:
keys: ds (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out Wed Aug 6 17:58:01 2014
@@ -395,11 +395,11 @@ STAGE PLANS:
keys: 1 (type: int)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col0:0._col0)
Modified: hive/trunk/ql/src/test/results/clientpositive/input24.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input24.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input24.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input24.q.out Wed Aug 6 17:58:01 2014
@@ -28,9 +28,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: x
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Group By Operator
aggregations: count(1)
mode: hash
Modified: hive/trunk/ql/src/test/results/clientpositive/input25.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input25.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input25.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input25.q.out Wed Aug 6 17:58:01 2014
@@ -45,14 +45,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: x
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: a (type: int), b (type: int), d (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Limit
Number of rows: 10
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Reduce Output Operator
sort order:
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
@@ -109,14 +109,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: x
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: a (type: int), b (type: int), d (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Limit
Number of rows: 10
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Reduce Output Operator
sort order:
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Modified: hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out Wed Aug 6 17:58:01 2014
@@ -129,9 +129,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tstparttbl2
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Group By Operator
aggregations: count(1)
mode: hash
@@ -325,9 +325,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tstparttbl2
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Group By Operator
aggregations: count(1)
mode: hash
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out Wed Aug 6 17:58:01 2014
@@ -86,14 +86,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out Wed Aug 6 17:58:01 2014
@@ -90,14 +90,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out Wed Aug 6 17:58:01 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/union11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union11.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union11.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union11.q.out Wed Aug 6 17:58:01 2014
@@ -73,12 +73,12 @@ STAGE PLANS:
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
TableScan
Union
@@ -92,12 +92,12 @@ STAGE PLANS:
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
TableScan
Union
@@ -111,12 +111,12 @@ STAGE PLANS:
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -124,14 +124,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/union14.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union14.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union14.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union14.q.out Wed Aug 6 17:58:01 2014
@@ -75,12 +75,12 @@ STAGE PLANS:
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
TableScan
Union
@@ -94,12 +94,12 @@ STAGE PLANS:
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -107,14 +107,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat