You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/08/08 23:49:02 UTC
hive git commit: HIVE-14442 : CBO: Calcite Operator To Hive Operator(Calcite Return Path): Wrong result/plan in group by with hive.map.aggr=false (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan

Repository: hive
Updated Branches:
  refs/heads/master 84708e8a4 -> 3d4408f7d


HIVE-14442 : CBO: Calcite Operator To Hive Operator(Calcite Return Path): Wrong result/plan in group by with hive.map.aggr=false (Vineet Garg via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3d4408f7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3d4408f7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3d4408f7

Branch: refs/heads/master
Commit: 3d4408f7d0aeb4a703757a8b6de3fca02a76201c
Parents: 84708e8
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Mon Aug 8 16:45:32 2016 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon Aug 8 16:45:32 2016 -0700

----------------------------------------------------------------------
 .../calcite/translator/HiveGBOpConvUtil.java    |  16 +-
 ql/src/test/queries/clientpositive/count.q      |  16 +
 ql/src/test/results/clientpositive/count.q.out  | 250 +++++++++++++++
 .../results/clientpositive/spark/count.q.out    | 314 ++++++++++++++++++
 .../test/results/clientpositive/tez/count.q.out | 318 +++++++++++++++++++
 5 files changed, 911 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3d4408f7/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index 3ecbbb1..25fe059 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -703,8 +703,18 @@ public class HiveGBOpConvUtil {
     String outputColName;
 
     // 1. Add GB Keys to reduce keys
-    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(inputOpAf.inputs.get(0), 0,
-        gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
+    ArrayList<ExprNodeDesc> reduceKeys= new ArrayList<ExprNodeDesc>();
+    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
+      //gbInfo already has ExprNode for gbkeys
+      reduceKeys.add(gbInfo.gbKeys.get(i));
+      String colOutputName = SemanticAnalyzer.getColumnInternalName(i);
+      outputKeyColumnNames.add(colOutputName);
+      colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false));
+      colExprMap.put(colOutputName, gbInfo.gbKeys.get(i));
+    }
+
+    // Note: GROUPING SETS are not allowed with map side aggregation set to false so we don't have to worry about it
+
     int keyLength = reduceKeys.size();
 
     // 2. Add Dist UDAF args to reduce keys
@@ -1002,7 +1012,7 @@ public class HiveGBOpConvUtil {
     int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
     // the positions in rsColInfoLst are as follows
     // --grpkey--,--distkey--,--values--
-    // but distUDAF may be before/after some non-distUDAF, 
+    // but distUDAF may be before/after some non-distUDAF,
     // i.e., their positions can be mixed.
     // so we first process distUDAF and then non-distUDAF.
     // But we need to remember the sequence of udafs.

http://git-wip-us.apache.org/repos/asf/hive/blob/3d4408f7/ql/src/test/queries/clientpositive/count.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/count.q b/ql/src/test/queries/clientpositive/count.q
index bb84030..41ffaf2 100644
--- a/ql/src/test/queries/clientpositive/count.q
+++ b/ql/src/test/queries/clientpositive/count.q
@@ -18,3 +18,19 @@ select a, count(distinct b), count(distinct c), sum(d) from abcd group by a;
 
 explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd;
 select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd;
+
+set hive.cbo.returnpath.hiveop=true;
+
+explain select count(distinct b) from abcd group by a;
+select count(distinct b) from abcd group by a;
+
+explain select count(distinct b) from abcd group by b;
+select count(distinct b) from abcd group by b;
+
+explain select count(distinct b) from abcd group by c;
+select count(distinct b) from abcd group by c;
+
+explain select count(b), count(distinct c) from abcd group by d;
+select count(b), count(distinct c) from abcd group by d;
+
+set hive.cbo.returnpath.hiveop=false;

http://git-wip-us.apache.org/repos/asf/hive/blob/3d4408f7/ql/src/test/results/clientpositive/count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/count.q.out b/ql/src/test/results/clientpositive/count.q.out
index 38928b7..c950c5b 100644
--- a/ql/src/test/results/clientpositive/count.q.out
+++ b/ql/src/test/results/clientpositive/count.q.out
@@ -264,3 +264,253 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
 7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
+PREHOOK: query: explain select count(distinct b) from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int)
+              outputColumnNames: a, b
+              Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: a (type: int), b (type: int)
+                sort order: ++
+                Map-reduce partition columns: a (type: int)
+                Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: a, $f1
+          Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: $f1 (type: bigint)
+            outputColumnNames: _o__c0
+            Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+1
+1
+1
+2
+PREHOOK: query: explain select count(distinct b) from abcd group by b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: b (type: int)
+              outputColumnNames: b
+              Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: b (type: int)
+                sort order: ++
+                Map-reduce partition columns: b (type: int)
+                Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: b, $f1
+          Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: $f1 (type: bigint)
+            outputColumnNames: _o__c0
+            Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0
+1
+1
+1
+PREHOOK: query: explain select count(distinct b) from abcd group by c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by c
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: b (type: int), c (type: int)
+              outputColumnNames: b, c
+              Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: c (type: int), b (type: int)
+                sort order: ++
+                Map-reduce partition columns: c (type: int)
+                Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: c, $f1
+          Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: $f1 (type: bigint)
+            outputColumnNames: _o__c0
+            Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0
+1
+1
+1
+1
+1
+1
+PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: c (type: int), d (type: int)
+              outputColumnNames: c, d
+              Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: d (type: int), c (type: int)
+                sort order: ++
+                Map-reduce partition columns: d (type: int)
+                Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: d, $f1, $f2
+          Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: $f1 (type: bigint), $f2 (type: bigint)
+            outputColumnNames: _o__c0, _o__c1
+            Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(b), count(distinct c) from abcd group by d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(b), count(distinct c) from abcd group by d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0	0
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1

http://git-wip-us.apache.org/repos/asf/hive/blob/3d4408f7/ql/src/test/results/clientpositive/spark/count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out
index 9aefba7..b1ad662 100644
--- a/ql/src/test/results/clientpositive/spark/count.q.out
+++ b/ql/src/test/results/clientpositive/spark/count.q.out
@@ -288,3 +288,317 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
 7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
+PREHOOK: query: explain select count(distinct b) from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a (type: int), b (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: a (type: int), b (type: int)
+                      Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: complete
+                outputColumnNames: a, b
+                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: a (type: int)
+                  sort order: +
+                  Map-reduce partition columns: a (type: int)
+                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: b (type: int)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: a, $f1
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint)
+                  outputColumnNames: _o__c0
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+1
+1
+1
+2
+PREHOOK: query: explain select count(distinct b) from abcd group by b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: b (type: int)
+                    outputColumnNames: b
+                    Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: b (type: int)
+                      sort order: +
+                      Map-reduce partition columns: b (type: int)
+                      Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: b
+                Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: b (type: int)
+                  sort order: +
+                  Map-reduce partition columns: b (type: int)
+                  Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(KEY._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: b, $f1
+                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint)
+                  outputColumnNames: _o__c0
+                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0
+1
+1
+1
+PREHOOK: query: explain select count(distinct b) from abcd group by c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by c
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (GROUP, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: b (type: int), c (type: int)
+                    outputColumnNames: b, c
+                    Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: b (type: int), c (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: b (type: int), c (type: int)
+                      Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: complete
+                outputColumnNames: b, c
+                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: c (type: int)
+                  sort order: +
+                  Map-reduce partition columns: c (type: int)
+                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(KEY._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: c, $f1
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint)
+                  outputColumnNames: _o__c0
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0
+1
+1
+1
+1
+1
+1
+PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: c (type: int), d (type: int)
+                    outputColumnNames: c, d
+                    Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: d (type: int), c (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: d (type: int)
+                      Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: d, $f1, $f2
+                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint), $f2 (type: bigint)
+                  outputColumnNames: _o__c0, _o__c1
+                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(b), count(distinct c) from abcd group by d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(b), count(distinct c) from abcd group by d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0	0
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1

http://git-wip-us.apache.org/repos/asf/hive/blob/3d4408f7/ql/src/test/results/clientpositive/tez/count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/count.q.out b/ql/src/test/results/clientpositive/tez/count.q.out
index 656b688..9fc2c75 100644
--- a/ql/src/test/results/clientpositive/tez/count.q.out
+++ b/ql/src/test/results/clientpositive/tez/count.q.out
@@ -292,3 +292,321 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
 7	7	6	6	6	7	3	3	6	7	4	5	6	6	5	6	4	5	5	5	4
+PREHOOK: query: explain select count(distinct b) from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a (type: int), b (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: a (type: int), b (type: int)
+                      Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: complete
+                outputColumnNames: a, b
+                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: a (type: int)
+                  sort order: +
+                  Map-reduce partition columns: a (type: int)
+                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: b (type: int)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: a, $f1
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint)
+                  outputColumnNames: _o__c0
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+1
+1
+1
+2
+PREHOOK: query: explain select count(distinct b) from abcd group by b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: b (type: int)
+                    outputColumnNames: b
+                    Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: b (type: int)
+                      sort order: +
+                      Map-reduce partition columns: b (type: int)
+                      Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: b
+                Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: b (type: int)
+                  sort order: +
+                  Map-reduce partition columns: b (type: int)
+                  Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(KEY._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: b, $f1
+                Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint)
+                  outputColumnNames: _o__c0
+                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0
+1
+1
+1
+PREHOOK: query: explain select count(distinct b) from abcd group by c
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(distinct b) from abcd group by c
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: b (type: int), c (type: int)
+                    outputColumnNames: b, c
+                    Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: b (type: int), c (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: b (type: int), c (type: int)
+                      Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: complete
+                outputColumnNames: b, c
+                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: c (type: int)
+                  sort order: +
+                  Map-reduce partition columns: c (type: int)
+                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(KEY._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: c, $f1
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint)
+                  outputColumnNames: _o__c0
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct b) from abcd group by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct b) from abcd group by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0
+1
+1
+1
+1
+1
+1
+PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: c (type: int), d (type: int)
+                    outputColumnNames: c, d
+                    Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: d (type: int), c (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: d (type: int)
+                      Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: d, $f1, $f2
+                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f1 (type: bigint), $f2 (type: bigint)
+                  outputColumnNames: _o__c0, _o__c1
+                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(b), count(distinct c) from abcd group by d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(b), count(distinct c) from abcd group by d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+0	0
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1