You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/09/07 22:13:12 UTC

svn commit: r1623185 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/ test/queries/clientpositive/ test/results/clientpositive/

Author: prasanthj
Date: Sun Sep  7 20:13:11 2014
New Revision: 1623185

URL: http://svn.apache.org/r1623185
Log:
HIVE-7990: With fetch column stats disabled number of elements in grouping set is not taken into account (Prasanth J reviewed by Gunther Hagleitner)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
    hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Sun Sep  7 20:13:11 2014
@@ -587,6 +587,23 @@ public class StatsRulesProcFactory {
       Map<String, ExprNodeDesc> colExprMap = gop.getColumnExprMap();
       RowSchema rs = gop.getSchema();
       Statistics stats = null;
+      boolean mapSide = false;
+      int multiplier = mapSideParallelism;
+      long newNumRows;
+      long newDataSize;
+
+      // map side
+      if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator ||
+          gop.getChildOperators().get(0) instanceof AppMasterEventOperator) {
+
+         mapSide = true;
+
+        // map-side grouping set present. if grouping set is present then
+        // multiply the number of rows by number of elements in grouping set
+        if (gop.getConf().isGroupingSetsPresent()) {
+          multiplier *= gop.getConf().getListGroupingSets().size();
+        }
+      }
 
       try {
         if (satisfyPrecondition(parentStats)) {
@@ -596,7 +613,6 @@ public class StatsRulesProcFactory {
               StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
           stats.setColumnStats(colStats);
           long dvProd = 1;
-          long newNumRows = 0;
 
           // compute product of distinct values of grouping columns
           for (ColStatistics cs : colStats) {
@@ -624,8 +640,7 @@ public class StatsRulesProcFactory {
           }
 
           // map side
-          if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator ||
-              gop.getChildOperators().get(0) instanceof AppMasterEventOperator) {
+          if (mapSide) {
 
             // since we do not know if hash-aggregation will be enabled or disabled
             // at runtime we will assume that map-side group by does not do any
@@ -634,14 +649,10 @@ public class StatsRulesProcFactory {
             // map-side grouping set present. if grouping set is present then
             // multiply the number of rows by number of elements in grouping set
             if (gop.getConf().isGroupingSetsPresent()) {
-              int multiplier = gop.getConf().getListGroupingSets().size();
-
-              // take into account the map-side parallelism as well, default is 1
-              multiplier *= mapSideParallelism;
               newNumRows = setMaxIfInvalid(multiplier * stats.getNumRows());
-              long dataSize = setMaxIfInvalid(multiplier * stats.getDataSize());
+              newDataSize = setMaxIfInvalid(multiplier * stats.getDataSize());
               stats.setNumRows(newNumRows);
-              stats.setDataSize(dataSize);
+              stats.setDataSize(newDataSize);
               for (ColStatistics cs : colStats) {
                 if (cs != null) {
                   long oldNumNulls = cs.getNumNulls();
@@ -652,7 +663,7 @@ public class StatsRulesProcFactory {
             } else {
 
               // map side no grouping set
-              newNumRows = stats.getNumRows() * mapSideParallelism;
+              newNumRows = stats.getNumRows() * multiplier;
               updateStats(stats, newNumRows, true, gop);
             }
           } else {
@@ -664,17 +675,20 @@ public class StatsRulesProcFactory {
         } else {
           if (parentStats != null) {
 
+            stats = parentStats.clone();
+
             // worst case, in the absence of column statistics assume half the rows are emitted
-            if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator
-                || gop.getChildOperators().get(0) instanceof AppMasterEventOperator) {
+            if (mapSide) {
 
               // map side
-              stats = parentStats.clone();
+              newNumRows = multiplier * stats.getNumRows();
+              newDataSize = multiplier * stats.getDataSize();
+              stats.setNumRows(newNumRows);
+              stats.setDataSize(newDataSize);
             } else {
 
               // reduce side
-              stats = parentStats.clone();
-              long newNumRows = parentStats.getNumRows() / 2;
+              newNumRows = parentStats.getNumRows() / 2;
               updateStats(stats, newNumRows, false, gop);
             }
           }

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q Sun Sep  7 20:13:11 2014
@@ -67,3 +67,33 @@ explain select year from loc_orc group b
 -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
 explain select state,locid from loc_orc group by state,locid with cube;
 
+set hive.stats.fetch.column.stats=false;
+set hive.stats.map.parallelism=1;
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid with cube;
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid with rollup;
+
+-- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain select state,locid from loc_orc group by state,locid grouping sets((state));
+
+-- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+
+-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+
+-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+
+set hive.stats.map.parallelism=10;
+
+-- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain select year from loc_orc group by year;
+
+-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain select state,locid from loc_orc group by state,locid with cube;
+

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Sun Sep  7 20:13:11 2014
@@ -756,3 +756,445 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid with rollup
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid with rollup
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain select state,locid from loc_orc group by state,locid grouping sets((state))
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+explain select state,locid from loc_orc group by state,locid grouping sets((state))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain select year from loc_orc group by year
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
+-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+explain select year from loc_orc group by year
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: year (type: int)
+              outputColumnNames: year
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: year (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: int)
+            outputColumnNames: _col0
+            Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain select state,locid from loc_orc group by state,locid with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+explain select state,locid from loc_orc group by state,locid with cube
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: loc_orc
+            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: state, locid
+              Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: state (type: string), locid (type: int), '0' (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out Sun Sep  7 20:13:11 2014
@@ -44,12 +44,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -128,12 +128,12 @@ STAGE PLANS:
                 keys: key (type: string), '0' (type: string), val (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -200,12 +200,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: rand() (type: double)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -213,7 +213,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -229,7 +229,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -308,12 +308,12 @@ STAGE PLANS:
                 keys: key (type: string), '0' (type: string), val (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -405,12 +405,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: rand() (type: double)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: bigint)
             Select Operator
               expressions: key (type: string), val (type: string)
@@ -421,7 +421,7 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -434,7 +434,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -450,7 +450,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -493,7 +493,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: rand() (type: double)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -501,7 +501,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -517,7 +517,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out Sun Sep  7 20:13:11 2014
@@ -56,7 +56,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -72,7 +72,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-              Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -162,7 +162,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -178,7 +178,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-              Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: double)
       Reduce Operator Tree:
         Group By Operator
@@ -290,7 +290,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -306,7 +306,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-              Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -314,14 +314,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: final
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out Sun Sep  7 20:13:11 2014
@@ -62,12 +62,12 @@ STAGE PLANS:
                 keys: a (type: string), b (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -162,7 +162,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -178,7 +178,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-              Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out Sun Sep  7 20:13:11 2014
@@ -44,12 +44,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -122,12 +122,12 @@ STAGE PLANS:
                 keys: key (type: string), '0' (type: string), val (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -194,12 +194,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: rand() (type: double)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -207,7 +207,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -223,7 +223,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -296,12 +296,12 @@ STAGE PLANS:
                 keys: key (type: string), '0' (type: string), val (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -393,12 +393,12 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: rand() (type: double)
-                  Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col3 (type: bigint)
             Select Operator
               expressions: key (type: string), val (type: string)
@@ -409,7 +409,7 @@ STAGE PLANS:
                 keys: key (type: string), val (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -422,7 +422,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -438,7 +438,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -481,7 +481,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: rand() (type: double)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -489,7 +489,7 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: partials
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+          Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
           File Output Operator
             compressed: false
             table:
@@ -505,7 +505,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-              Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE
               value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out?rev=1623185&r1=1623184&r2=1623185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out Sun Sep  7 20:13:11 2014
@@ -51,12 +51,12 @@ STAGE PLANS:
                 keys: key (type: string), value (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 87 Data size: 17436 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -64,14 +64,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 43 Data size: 8617 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 43 Data size: 8617 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 43 Data size: 8617 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -238,12 +238,12 @@ STAGE PLANS:
                 keys: key (type: string), value (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 116 Data size: 23248 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -251,14 +251,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -425,12 +425,12 @@ STAGE PLANS:
                 keys: key (type: string), value (type: string), '0' (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col3 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -438,14 +438,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat