You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/09/07 22:13:12 UTC
svn commit: r1623185 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/
test/queries/clientpositive/ test/results/clientpositive/
Author: prasanthj
Date: Sun Sep 7 20:13:11 2014
New Revision: 1623185
URL: http://svn.apache.org/r1623185
Log:
HIVE-7990: With fetch column stats disabled number of elements in grouping set is not taken into account (Prasanth J reviewed by Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Sun Sep 7 20:13:11 2014
@@ -587,6 +587,23 @@ public class StatsRulesProcFactory {
Map<String, ExprNodeDesc> colExprMap = gop.getColumnExprMap();
RowSchema rs = gop.getSchema();
Statistics stats = null;
+ boolean mapSide = false;
+ int multiplier = mapSideParallelism;
+ long newNumRows;
+ long newDataSize;
+
+ // map side
+ if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator ||
+ gop.getChildOperators().get(0) instanceof AppMasterEventOperator) {
+
+ mapSide = true;
+
+ // map-side grouping set present. if grouping set is present then
+ // multiply the number of rows by number of elements in grouping set
+ if (gop.getConf().isGroupingSetsPresent()) {
+ multiplier *= gop.getConf().getListGroupingSets().size();
+ }
+ }
try {
if (satisfyPrecondition(parentStats)) {
@@ -596,7 +613,6 @@ public class StatsRulesProcFactory {
StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
stats.setColumnStats(colStats);
long dvProd = 1;
- long newNumRows = 0;
// compute product of distinct values of grouping columns
for (ColStatistics cs : colStats) {
@@ -624,8 +640,7 @@ public class StatsRulesProcFactory {
}
// map side
- if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator ||
- gop.getChildOperators().get(0) instanceof AppMasterEventOperator) {
+ if (mapSide) {
// since we do not know if hash-aggregation will be enabled or disabled
// at runtime we will assume that map-side group by does not do any
@@ -634,14 +649,10 @@ public class StatsRulesProcFactory {
// map-side grouping set present. if grouping set is present then
// multiply the number of rows by number of elements in grouping set
if (gop.getConf().isGroupingSetsPresent()) {
- int multiplier = gop.getConf().getListGroupingSets().size();
-
- // take into account the map-side parallelism as well, default is 1
- multiplier *= mapSideParallelism;
newNumRows = setMaxIfInvalid(multiplier * stats.getNumRows());
- long dataSize = setMaxIfInvalid(multiplier * stats.getDataSize());
+ newDataSize = setMaxIfInvalid(multiplier * stats.getDataSize());
stats.setNumRows(newNumRows);
- stats.setDataSize(dataSize);
+ stats.setDataSize(newDataSize);
for (ColStatistics cs : colStats) {
if (cs != null) {
long oldNumNulls = cs.getNumNulls();
@@ -652,7 +663,7 @@ public class StatsRulesProcFactory {
} else {
// map side no grouping set
- newNumRows = stats.getNumRows() * mapSideParallelism;
+ newNumRows = stats.getNumRows() * multiplier;
updateStats(stats, newNumRows, true, gop);
}
} else {
@@ -664,17 +675,20 @@ public class StatsRulesProcFactory {
} else {
if (parentStats != null) {
+ stats = parentStats.clone();
+
// worst case, in the absence of column statistics assume half the rows are emitted
- if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator
- || gop.getChildOperators().get(0) instanceof AppMasterEventOperator) {
+ if (mapSide) {
// map side
- stats = parentStats.clone();
+ newNumRows = multiplier * stats.getNumRows();
+ newDataSize = multiplier * stats.getDataSize();
+ stats.setNumRows(newNumRows);
+ stats.setDataSize(newDataSize);
} else {
// reduce side
- stats = parentStats.clone();
- long newNumRows = parentStats.getNumRows() / 2;
+ newNumRows = parentStats.getNumRows() / 2;
updateStats(stats, newNumRows, false, gop);
}
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q Sun Sep 7 20:13:11 2014
@@ -67,3 +67,33 @@ explain select year from loc_orc group b
-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
explain select state,locid from loc_orc group by state,locid with cube;
+set hive.stats.fetch.column.stats=false;
+set hive.stats.map.parallelism=1;
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid with cube;
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid with rollup;
+
+-- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain select state,locid from loc_orc group by state,locid grouping sets((state));
+
+-- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+
+set hive.stats.map.parallelism=10;
+
+-- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain select year from loc_orc group by year;
+
+-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain select state,locid from loc_orc group by state,locid with cube;
+
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Sun Sep 7 20:13:11 2014
@@ -756,3 +756,445 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid with rollup
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid with rollup
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain select state,locid from loc_orc group by state,locid grouping sets((state))
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain select state,locid from loc_orc group by state,locid grouping sets((state))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain select year from loc_orc group by year
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain select year from loc_orc group by year
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: year (type: int)
+ outputColumnNames: year
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: year (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain select state,locid from loc_orc group by state,locid with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain select state,locid from loc_orc group by state,locid with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out Sun Sep 7 20:13:11 2014
@@ -44,12 +44,12 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -128,12 +128,12 @@ STAGE PLANS:
keys: key (type: string), '0' (type: string), val (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -200,12 +200,12 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -213,7 +213,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -229,7 +229,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -308,12 +308,12 @@ STAGE PLANS:
keys: key (type: string), '0' (type: string), val (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -405,12 +405,12 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Select Operator
expressions: key (type: string), val (type: string)
@@ -421,7 +421,7 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -434,7 +434,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -450,7 +450,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -493,7 +493,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -501,7 +501,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -517,7 +517,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out Sun Sep 7 20:13:11 2014
@@ -56,7 +56,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -72,7 +72,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -162,7 +162,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -178,7 +178,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: double)
Reduce Operator Tree:
Group By Operator
@@ -290,7 +290,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -306,7 +306,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -314,14 +314,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: final
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out Sun Sep 7 20:13:11 2014
@@ -62,12 +62,12 @@ STAGE PLANS:
keys: a (type: string), b (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -162,7 +162,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -178,7 +178,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out Sun Sep 7 20:13:11 2014
@@ -44,12 +44,12 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -122,12 +122,12 @@ STAGE PLANS:
keys: key (type: string), '0' (type: string), val (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -194,12 +194,12 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -207,7 +207,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -223,7 +223,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -296,12 +296,12 @@ STAGE PLANS:
keys: key (type: string), '0' (type: string), val (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -393,12 +393,12 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Select Operator
expressions: key (type: string), val (type: string)
@@ -409,7 +409,7 @@ STAGE PLANS:
keys: key (type: string), val (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -422,7 +422,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -438,7 +438,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -481,7 +481,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: rand() (type: double)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -489,7 +489,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
table:
@@ -505,7 +505,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out Sun Sep 7 20:13:11 2014
@@ -51,12 +51,12 @@ STAGE PLANS:
keys: key (type: string), value (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -64,14 +64,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 43 Data size: 8617 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 43 Data size: 8617 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 43 Data size: 8617 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -238,12 +238,12 @@ STAGE PLANS:
keys: key (type: string), value (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -251,14 +251,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -425,12 +425,12 @@ STAGE PLANS:
keys: key (type: string), value (type: string), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -438,14 +438,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat