You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/06/04 21:50:15 UTC

[1/4] hive git commit: HIVE-19690 : multi-insert query with multiple GBY, and distinct in only some branches can produce incorrect results (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/branch-3 c2cc42c1c -> 5ec8e356d
  refs/heads/master 43e331e35 -> 5667af34c


http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
index 9c4cdec..113ff46 100644
--- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
+++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
@@ -47,10 +47,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 5 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -59,53 +60,84 @@ STAGE PLANS:
                     expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col2 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string)
+                      outputColumnNames: _col0, _col2
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
+                      Group By Operator
+                        aggregations: count(DISTINCT _col2)
+                        keys: _col0 (type: string), _col2 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string), _col2 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col1:0._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-                    outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e1_n2
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-                  mode: complete
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e1_n2
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
                   outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-                    outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e2_n3
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e2_n3
 
   Stage: Stage-0
     Move Operator
@@ -160,10 +192,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 4 (GROUP, 2)
+        Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -172,53 +205,84 @@ STAGE PLANS:
                     expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col2 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string), _col2 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
             Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string)
+                      outputColumnNames: _col0, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(DISTINCT _col2)
+                        keys: _col0 (type: string), _col2 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
+            Execution mode: vectorized
             Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col1:0._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
-                  outputColumnNames: _col0, _col1
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-                    outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e1_n2
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-                  mode: complete
-                  outputColumnNames: _col0, _col1, _col2
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e2_n3
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
+                  outputColumnNames: _col0, _col1
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-                    outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e2_n3
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e1_n2
 
   Stage: Stage-0
     Move Operator
@@ -228,7 +292,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e1_n2
+              name: default.e2_n3
 
   Stage: Stage-3
     Stats Work
@@ -242,7 +306,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e2_n3
+              name: default.e1_n2
 
   Stage: Stage-4
     Stats Work
@@ -1731,11 +1795,36 @@ STAGE PLANS:
   Stage: Stage-3
     Spark
       Edges:
-        Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2)
-        Reducer 3 <- Map 5 (GROUP, 2)
+        Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 6 (GROUP, 2)
+        Reducer 4 <- Map 7 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 4 
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string)
+                      outputColumnNames: _col0, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(DISTINCT _col2)
+                        keys: _col0 (type: string), _col2 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -1744,14 +1833,20 @@ STAGE PLANS:
                     expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col2 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string), _col2 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
             Execution mode: vectorized
-        Map 5 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -1773,45 +1868,46 @@ STAGE PLANS:
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col1:0._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-                    outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e1_n2
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-                  mode: complete
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e1_n2
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
                   outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-                    outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e2_n3
-        Reducer 3 
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e2_n3
+        Reducer 4 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(DISTINCT KEY._col3:0._col0)


[4/4] hive git commit: HIVE-19690 : multi-insert query with multiple GBY, and distinct in only some branches can produce incorrect results (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.
HIVE-19690 : multi-insert query with multiple GBY, and distinct in only some branches can produce incorrect results (Sergey Shelukhin, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ec8e356
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ec8e356
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ec8e356

Branch: refs/heads/branch-3
Commit: 5ec8e356d88318ebebbd3e7e320eae2b8fd20218
Parents: c2cc42c
Author: sergey <se...@apache.org>
Authored: Mon Jun 4 14:42:06 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Mon Jun 4 14:45:05 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/KeyWrapperFactory.java  |  12 +
 .../apache/hadoop/hive/ql/exec/MapOperator.java |   5 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  73 ++-
 .../clientpositive/multi_insert_distinct.q      |  66 ++
 .../clientpositive/multi_insert_distinct.q.out  | 534 +++++++++++++++++
 .../clientpositive/multi_insert_gby3.q.out      | 597 +++++++++++--------
 .../spark/multi_insert_gby3.q.out               | 344 +++++++----
 7 files changed, 1228 insertions(+), 403 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
index 3c7f0b7..71ee25d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectsEqualComparer;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -65,6 +67,11 @@ public class KeyWrapperFactory {
   class ListKeyWrapper extends KeyWrapper {
     int hashcode = -1;
     Object[] keys;
+    @Override
+    public String toString() {
+      return "ListKeyWrapper [keys=" + Arrays.toString(keys) + "]";
+    }
+
     // decide whether this is already in hashmap (keys in hashmap are deepcopied
     // version, and we need to use 'currentKeyObjectInspector').
     ListObjectsEqualComparer equalComparer;
@@ -165,6 +172,11 @@ public class KeyWrapperFactory {
   transient StringObjectInspector soi_new, soi_copy;
 
   class TextKeyWrapper extends KeyWrapper {
+    @Override
+    public String toString() {
+      return "TextKeyWrapper [key=" + key + "]";
+    }
+
     int hashcode;
     Object key;
     boolean isCopy;

http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
index 29f3579..16d7c51 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
@@ -90,6 +90,11 @@ public class MapOperator extends AbstractMapOperator {
 
   protected static class MapOpCtx {
 
+    @Override
+    public String toString() {
+      return "[alias=" + alias + ", op=" + op + "]";
+    }
+
     final String alias;
     final Operator<?> op;
     final PartitionDesc partDesc;

http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 66f4b67..506dc39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -10213,62 +10213,62 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
     List<Operator<? extends OperatorDesc>> inputOperators =
         new ArrayList<Operator<? extends OperatorDesc>>(ks.size());
-    List<List<ExprNodeDesc>> sprayKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
-    List<List<ExprNodeDesc>> distinctKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
+    // We will try to combine multiple clauses into a smaller number with compatible keys.
+    List<List<ExprNodeDesc>> newSprayKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
+    List<List<ExprNodeDesc>> newDistinctKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
 
     // Iterate over each clause
     for (String dest : ks) {
       Operator input = inputs.get(dest);
       RowResolver inputRR = opParseCtx.get(input).getRowResolver();
 
-      List<ExprNodeDesc> distinctKeys = getDistinctExprs(qbp, dest, inputRR);
-      List<ExprNodeDesc> sprayKeys = new ArrayList<ExprNodeDesc>();
+      // Determine the keys for the current clause.
+      List<ExprNodeDesc> currentDistinctKeys = getDistinctExprs(qbp, dest, inputRR);
+      List<ExprNodeDesc> currentSprayKeys = determineSprayKeys(qbp, dest, inputRR);
 
-      // Add the group by expressions
-      List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
-      for (ASTNode grpByExpr : grpByExprs) {
-        ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR);
-        if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) {
-          sprayKeys.add(exprDesc);
-        }
-      }
-
-      // Loop through each of the lists of exprs, looking for a match
+      // Loop through each of the lists of exprs, looking for a match.
       boolean found = false;
-      for (int i = 0; i < sprayKeyLists.size(); i++) {
+      for (int i = 0; i < newSprayKeyLists.size(); i++) {
         if (!input.equals(inputOperators.get(i))) {
           continue;
         }
+        // We will try to merge this clause into one of the previously added ones.
+        List<ExprNodeDesc> targetSprayKeys = newSprayKeyLists.get(i);
+        List<ExprNodeDesc> targetDistinctKeys = newDistinctKeyLists.get(i);
+        if (currentDistinctKeys.isEmpty() != targetDistinctKeys.isEmpty()) {
+          // GBY without distinct keys is not prepared to process distinct key structured rows.
+          continue;
+        }
 
-        if (distinctKeys.isEmpty()) {
+        if (currentDistinctKeys.isEmpty()) {
           // current dest has no distinct keys.
           List<ExprNodeDesc> combinedList = new ArrayList<ExprNodeDesc>();
-          combineExprNodeLists(sprayKeyLists.get(i), distinctKeyLists.get(i), combinedList);
-          if (!matchExprLists(combinedList, sprayKeys)) {
+          combineExprNodeLists(targetSprayKeys, targetDistinctKeys, combinedList);
+          if (!matchExprLists(combinedList, currentSprayKeys)) {
             continue;
           } // else do the common code at the end.
         } else {
-          if (distinctKeyLists.get(i).isEmpty()) {
+          if (targetDistinctKeys.isEmpty()) {
             List<ExprNodeDesc> combinedList = new ArrayList<ExprNodeDesc>();
-            combineExprNodeLists(sprayKeys, distinctKeys, combinedList);
-            if (!matchExprLists(combinedList, sprayKeyLists.get(i))) {
+            combineExprNodeLists(currentSprayKeys, currentDistinctKeys, combinedList);
+            if (!matchExprLists(combinedList, targetSprayKeys)) {
               continue;
             } else {
               // we have found a match. insert this distinct clause to head.
-              distinctKeyLists.remove(i);
-              sprayKeyLists.remove(i);
-              distinctKeyLists.add(i, distinctKeys);
-              sprayKeyLists.add(i, sprayKeys);
+              newDistinctKeyLists.remove(i);
+              newSprayKeyLists.remove(i);
+              newDistinctKeyLists.add(i, currentDistinctKeys);
+              newSprayKeyLists.add(i, currentSprayKeys);
               commonGroupByDestGroups.get(i).add(0, dest);
               found = true;
               break;
             }
           } else {
-            if (!matchExprLists(distinctKeyLists.get(i), distinctKeys)) {
+            if (!matchExprLists(targetDistinctKeys, currentDistinctKeys)) {
               continue;
             }
 
-            if (!matchExprLists(sprayKeyLists.get(i), sprayKeys)) {
+            if (!matchExprLists(targetSprayKeys, currentSprayKeys)) {
               continue;
             }
             // else do common code
@@ -10285,8 +10285,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       // No match was found, so create new entries
       if (!found) {
         inputOperators.add(input);
-        sprayKeyLists.add(sprayKeys);
-        distinctKeyLists.add(distinctKeys);
+        newSprayKeyLists.add(currentSprayKeys);
+        newDistinctKeyLists.add(currentDistinctKeys);
         List<String> destGroup = new ArrayList<String>();
         destGroup.add(dest);
         commonGroupByDestGroups.add(destGroup);
@@ -10296,6 +10296,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return commonGroupByDestGroups;
   }
 
+  protected List<ExprNodeDesc> determineSprayKeys(QBParseInfo qbp, String dest,
+      RowResolver inputRR) throws SemanticException {
+    List<ExprNodeDesc> sprayKeys = new ArrayList<ExprNodeDesc>();
+
+    // Add the group by expressions
+    List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
+    for (ASTNode grpByExpr : grpByExprs) {
+      ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR);
+      if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) {
+        sprayKeys.add(exprDesc);
+      }
+    }
+    return sprayKeys;
+  }
+
   private void combineExprNodeLists(List<ExprNodeDesc> list, List<ExprNodeDesc> list2,
                                     List<ExprNodeDesc> combinedList) {
     combinedList.addAll(list);

http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/test/queries/clientpositive/multi_insert_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_distinct.q b/ql/src/test/queries/clientpositive/multi_insert_distinct.q
new file mode 100644
index 0000000..ac3e5ee
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_insert_distinct.q
@@ -0,0 +1,66 @@
+--! qt:dataset:src
+
+CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED 
+FIELDS TERMINATED BY '\t' 
+LINES TERMINATED BY '\n' 
+;
+
+INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a');
+
+
+CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
+CREATE TABLE tmp_grouped_by_one_col  ( v1 string , cnt__v2 int , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
+CREATE TABLE tmp_grouped_by_two_col  ( v1 string , v2 string , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
+
+
+set hive.explain.user=false;
+set hive.stats.autogather=false; 
+set hive.stats.column.autogather=false; 
+
+explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2;
+
+FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2;
+
+select * from tmp_grouped_by_two_col;
+
+truncate table tmp_grouped_by_two_col;
+
+explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2;
+
+FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2;
+
+select * from tmp_grouped_by_two_col;
+
+explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3;
+
+FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3;
+
+select * from tmp_grouped_by_all_col;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/test/results/clientpositive/multi_insert_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_insert_distinct.q.out b/ql/src/test/results/clientpositive/multi_insert_distinct.q.out
new file mode 100644
index 0000000..e86711a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/multi_insert_distinct.q.out
@@ -0,0 +1,534 @@
+PREHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED 
+FIELDS TERMINATED BY '\t' 
+LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp1
+POSTHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED 
+FIELDS TERMINATED BY '\t' 
+LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp1
+PREHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tmp1
+POSTHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tmp1
+POSTHOOK: Lineage: tmp1.v1 SCRIPT []
+POSTHOOK: Lineage: tmp1.v2 SCRIPT []
+POSTHOOK: Lineage: tmp1.v3 SCRIPT []
+PREHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_grouped_by_all_col
+POSTHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_grouped_by_all_col
+PREHOOK: query: CREATE TABLE tmp_grouped_by_one_col  ( v1 string , cnt__v2 int , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: query: CREATE TABLE tmp_grouped_by_one_col  ( v1 string , cnt__v2 int , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+PREHOOK: query: CREATE TABLE tmp_grouped_by_two_col  ( v1 string , v2 string , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: CREATE TABLE tmp_grouped_by_two_col  ( v1 string , v2 string , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+PREHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tmp1
+            Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v2), count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+              sort order: +++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col2:0._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_two_col
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_two_col
+
+PREHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp1
+PREHOOK: Output: default@tmp_grouped_by_one_col
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp1
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+PREHOOK: query: select * from tmp_grouped_by_two_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmp_grouped_by_two_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+v1	v2	2
+PREHOOK: query: truncate table tmp_grouped_by_two_col
+PREHOOK: type: TRUNCATETABLE
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: truncate table tmp_grouped_by_two_col
+POSTHOOK: type: TRUNCATETABLE
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+PREHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tmp1
+            Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v2), count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(v3)
+                keys: v1 (type: string), v2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_two_col
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_two_col
+
+PREHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp1
+PREHOOK: Output: default@tmp_grouped_by_one_col
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp1
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+PREHOOK: query: select * from tmp_grouped_by_two_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmp_grouped_by_two_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+v1	v2	2
+PREHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tmp1
+            Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v2), count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+              sort order: +++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.tmp_grouped_by_all_col
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_all_col
+
+PREHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp1
+PREHOOK: Output: default@tmp_grouped_by_all_col
+PREHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp1
+POSTHOOK: Output: default@tmp_grouped_by_all_col
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: Lineage: tmp_grouped_by_all_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_all_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_all_col.v3 SIMPLE [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+PREHOOK: query: select * from tmp_grouped_by_all_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_grouped_by_all_col
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmp_grouped_by_all_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_grouped_by_all_col
+#### A masked pattern was here ####
+v1	v2	v3
+v1	v2	v3a

http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
index 6c75853..677d9d4 100644
--- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
+++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
@@ -39,11 +39,12 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+  Stage-3 depends on stages: Stage-0, Stage-4, Stage-7
   Stage-4 depends on stages: Stage-2
-  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
-  Stage-1 depends on stages: Stage-2
-  Stage-6 depends on stages: Stage-2
+  Stage-6 depends on stages: Stage-1, Stage-4, Stage-7
+  Stage-5 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-5
 
 STAGE PLANS:
   Stage: Stage-2
@@ -56,82 +57,67 @@ STAGE PLANS:
               expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col2 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string)
+              Select Operator
+                expressions: _col0 (type: string), _col2 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col1 (type: double)
-      Execution mode: vectorized
+                Group By Operator
+                  aggregations: count(DISTINCT _col2)
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string), _col2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col1:0._col0)
-            keys: KEY._col0 (type: string)
-            mode: complete
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-              outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e1_n2
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double)
-                outputColumnNames: key, keyd
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(VALUE._col0)
-            keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-            mode: complete
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e1_n2
             Select Operator
-              expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-              outputColumnNames: _col0, _col1, _col2
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: key, keyd
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e2_n3
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
-                outputColumnNames: key, keyd, value
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -157,7 +143,7 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
@@ -165,16 +151,16 @@ STAGE PLANS:
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-5
+  Stage: Stage-6
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -182,6 +168,52 @@ STAGE PLANS:
           Column Types: string, double, string
           Table: default.e2_n3
 
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: double)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e2_n3
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
+              outputColumnNames: key, keyd, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Move Operator
       tables:
@@ -192,7 +224,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.e2_n3
 
-  Stage: Stage-6
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -232,11 +264,12 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+  Stage-3 depends on stages: Stage-0, Stage-4, Stage-7
   Stage-4 depends on stages: Stage-2
-  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
-  Stage-1 depends on stages: Stage-2
-  Stage-6 depends on stages: Stage-2
+  Stage-6 depends on stages: Stage-1, Stage-4, Stage-7
+  Stage-5 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-5
 
 STAGE PLANS:
   Stage: Stage-2
@@ -249,82 +282,68 @@ STAGE PLANS:
               expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col2 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string)
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string), _col2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col1 (type: double)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col1:0._col0)
-            keys: KEY._col0 (type: string)
-            mode: complete
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e1_n2
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: double)
               Select Operator
-                expressions: _col0 (type: string), _col1 (type: double)
-                outputColumnNames: key, keyd
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                expressions: _col0 (type: string), _col2 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                  aggregations: count(DISTINCT _col2)
+                  keys: _col0 (type: string), _col2 (type: string)
                   mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(VALUE._col0)
-            keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-            mode: complete
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e2_n3
             Select Operator
-              expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-              outputColumnNames: _col0, _col1, _col2
+              expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
+              outputColumnNames: key, keyd, value
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e2_n3
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
-                outputColumnNames: key, keyd, value
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -334,15 +353,15 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e1_n2
+              name: default.e2_n3
 
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
-          Columns: key, keyd
-          Column Types: string, double
-          Table: default.e1_n2
+          Columns: key, keyd, value
+          Column Types: string, double, string
+          Table: default.e2_n3
 
   Stage: Stage-4
     Map Reduce
@@ -350,30 +369,75 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+              Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
           mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-5
+  Stage: Stage-6
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
-          Columns: key, keyd, value
-          Column Types: string, double, string
-          Table: default.e2_n3
+          Columns: key, keyd
+          Column Types: string, double
+          Table: default.e1_n2
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e1_n2
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: key, keyd
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Move Operator
@@ -383,26 +447,26 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e2_n3
+              name: default.e1_n2
 
-  Stage: Stage-6
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+              Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1967,15 +2031,16 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-3 is a root stage
   Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0, Stage-5, Stage-7, Stage-10
+  Stage-4 depends on stages: Stage-0, Stage-5, Stage-8, Stage-11
   Stage-5 depends on stages: Stage-3
-  Stage-6 depends on stages: Stage-1, Stage-5, Stage-7, Stage-10
-  Stage-9 depends on stages: Stage-2, Stage-5, Stage-7, Stage-10
-  Stage-1 depends on stages: Stage-3
-  Stage-7 depends on stages: Stage-3
-  Stage-8 depends on stages: Stage-3
-  Stage-2 depends on stages: Stage-8
-  Stage-10 depends on stages: Stage-8
+  Stage-7 depends on stages: Stage-1, Stage-5, Stage-8, Stage-11
+  Stage-10 depends on stages: Stage-2, Stage-5, Stage-8, Stage-11
+  Stage-6 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-6
+  Stage-8 depends on stages: Stage-6
+  Stage-9 depends on stages: Stage-3
+  Stage-2 depends on stages: Stage-9
+  Stage-11 depends on stages: Stage-9
 
 STAGE PLANS:
   Stage: Stage-3
@@ -1988,12 +2053,33 @@ STAGE PLANS:
               expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col2 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string)
+              Select Operator
+                expressions: _col0 (type: string), _col2 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col1 (type: double)
+                Group By Operator
+                  aggregations: count(DISTINCT _col2)
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string), _col2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
               Group By Operator
                 aggregations: count(DISTINCT _col1)
                 keys: _col0 (type: string), _col1 (type: double), _col2 (type: string)
@@ -2007,74 +2093,39 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col1:0._col0)
-            keys: KEY._col0 (type: string)
-            mode: complete
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-              outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e1_n2
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double)
-                outputColumnNames: key, keyd
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(VALUE._col0)
-            keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-            mode: complete
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e1_n2
             Select Operator
-              expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-              outputColumnNames: _col0, _col1, _col2
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: key, keyd
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e2_n3
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
-                outputColumnNames: key, keyd, value
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -2100,7 +2151,7 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
@@ -2108,16 +2159,16 @@ STAGE PLANS:
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-6
+  Stage: Stage-7
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -2125,7 +2176,7 @@ STAGE PLANS:
           Column Types: string, double, string
           Table: default.e2_n3
 
-  Stage: Stage-9
+  Stage: Stage-10
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -2133,6 +2184,52 @@ STAGE PLANS:
           Column Types: string, double
           Table: default.e3
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: double)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e2_n3
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
+              outputColumnNames: key, keyd, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Move Operator
       tables:
@@ -2143,7 +2240,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.e2_n3
 
-  Stage: Stage-7
+  Stage: Stage-8
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -2166,7 +2263,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-8
+  Stage: Stage-9
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -2221,7 +2318,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.e3
 
-  Stage: Stage-10
+  Stage: Stage-11
     Map Reduce
       Map Operator Tree:
           TableScan


[2/4] hive git commit: HIVE-19690 : multi-insert query with multiple GBY, and distinct in only some branches can produce incorrect results (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.
HIVE-19690 : multi-insert query with multiple GBY, and distinct in only some branches can produce incorrect results (Sergey Shelukhin, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5667af34
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5667af34
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5667af34

Branch: refs/heads/master
Commit: 5667af34c4cf36cc41aaf39a8e42f4dad42d2cee
Parents: 43e331e
Author: sergey <se...@apache.org>
Authored: Mon Jun 4 14:42:06 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Mon Jun 4 14:42:06 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/KeyWrapperFactory.java  |  12 +
 .../apache/hadoop/hive/ql/exec/MapOperator.java |   5 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  73 ++-
 .../clientpositive/multi_insert_distinct.q      |  66 ++
 .../clientpositive/multi_insert_distinct.q.out  | 534 +++++++++++++++++
 .../clientpositive/multi_insert_gby3.q.out      | 597 +++++++++++--------
 .../spark/multi_insert_gby3.q.out               | 344 +++++++----
 7 files changed, 1228 insertions(+), 403 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
index 3c7f0b7..71ee25d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.util.Arrays;
+
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectsEqualComparer;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -65,6 +67,11 @@ public class KeyWrapperFactory {
   class ListKeyWrapper extends KeyWrapper {
     int hashcode = -1;
     Object[] keys;
+    @Override
+    public String toString() {
+      return "ListKeyWrapper [keys=" + Arrays.toString(keys) + "]";
+    }
+
     // decide whether this is already in hashmap (keys in hashmap are deepcopied
     // version, and we need to use 'currentKeyObjectInspector').
     ListObjectsEqualComparer equalComparer;
@@ -165,6 +172,11 @@ public class KeyWrapperFactory {
   transient StringObjectInspector soi_new, soi_copy;
 
   class TextKeyWrapper extends KeyWrapper {
+    @Override
+    public String toString() {
+      return "TextKeyWrapper [key=" + key + "]";
+    }
+
     int hashcode;
     Object key;
     boolean isCopy;

http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
index 29f3579..16d7c51 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
@@ -90,6 +90,11 @@ public class MapOperator extends AbstractMapOperator {
 
   protected static class MapOpCtx {
 
+    @Override
+    public String toString() {
+      return "[alias=" + alias + ", op=" + op + "]";
+    }
+
     final String alias;
     final Operator<?> op;
     final PartitionDesc partDesc;

http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 66f4b67..506dc39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -10213,62 +10213,62 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
     List<Operator<? extends OperatorDesc>> inputOperators =
         new ArrayList<Operator<? extends OperatorDesc>>(ks.size());
-    List<List<ExprNodeDesc>> sprayKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
-    List<List<ExprNodeDesc>> distinctKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
+    // We will try to combine multiple clauses into a smaller number with compatible keys.
+    List<List<ExprNodeDesc>> newSprayKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
+    List<List<ExprNodeDesc>> newDistinctKeyLists = new ArrayList<List<ExprNodeDesc>>(ks.size());
 
     // Iterate over each clause
     for (String dest : ks) {
       Operator input = inputs.get(dest);
       RowResolver inputRR = opParseCtx.get(input).getRowResolver();
 
-      List<ExprNodeDesc> distinctKeys = getDistinctExprs(qbp, dest, inputRR);
-      List<ExprNodeDesc> sprayKeys = new ArrayList<ExprNodeDesc>();
+      // Determine the keys for the current clause.
+      List<ExprNodeDesc> currentDistinctKeys = getDistinctExprs(qbp, dest, inputRR);
+      List<ExprNodeDesc> currentSprayKeys = determineSprayKeys(qbp, dest, inputRR);
 
-      // Add the group by expressions
-      List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
-      for (ASTNode grpByExpr : grpByExprs) {
-        ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR);
-        if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) {
-          sprayKeys.add(exprDesc);
-        }
-      }
-
-      // Loop through each of the lists of exprs, looking for a match
+      // Loop through each of the lists of exprs, looking for a match.
       boolean found = false;
-      for (int i = 0; i < sprayKeyLists.size(); i++) {
+      for (int i = 0; i < newSprayKeyLists.size(); i++) {
         if (!input.equals(inputOperators.get(i))) {
           continue;
         }
+        // We will try to merge this clause into one of the previously added ones.
+        List<ExprNodeDesc> targetSprayKeys = newSprayKeyLists.get(i);
+        List<ExprNodeDesc> targetDistinctKeys = newDistinctKeyLists.get(i);
+        if (currentDistinctKeys.isEmpty() != targetDistinctKeys.isEmpty()) {
+          // GBY without distinct keys is not prepared to process distinct key structured rows.
+          continue;
+        }
 
-        if (distinctKeys.isEmpty()) {
+        if (currentDistinctKeys.isEmpty()) {
           // current dest has no distinct keys.
           List<ExprNodeDesc> combinedList = new ArrayList<ExprNodeDesc>();
-          combineExprNodeLists(sprayKeyLists.get(i), distinctKeyLists.get(i), combinedList);
-          if (!matchExprLists(combinedList, sprayKeys)) {
+          combineExprNodeLists(targetSprayKeys, targetDistinctKeys, combinedList);
+          if (!matchExprLists(combinedList, currentSprayKeys)) {
             continue;
           } // else do the common code at the end.
         } else {
-          if (distinctKeyLists.get(i).isEmpty()) {
+          if (targetDistinctKeys.isEmpty()) {
             List<ExprNodeDesc> combinedList = new ArrayList<ExprNodeDesc>();
-            combineExprNodeLists(sprayKeys, distinctKeys, combinedList);
-            if (!matchExprLists(combinedList, sprayKeyLists.get(i))) {
+            combineExprNodeLists(currentSprayKeys, currentDistinctKeys, combinedList);
+            if (!matchExprLists(combinedList, targetSprayKeys)) {
               continue;
             } else {
               // we have found a match. insert this distinct clause to head.
-              distinctKeyLists.remove(i);
-              sprayKeyLists.remove(i);
-              distinctKeyLists.add(i, distinctKeys);
-              sprayKeyLists.add(i, sprayKeys);
+              newDistinctKeyLists.remove(i);
+              newSprayKeyLists.remove(i);
+              newDistinctKeyLists.add(i, currentDistinctKeys);
+              newSprayKeyLists.add(i, currentSprayKeys);
               commonGroupByDestGroups.get(i).add(0, dest);
               found = true;
               break;
             }
           } else {
-            if (!matchExprLists(distinctKeyLists.get(i), distinctKeys)) {
+            if (!matchExprLists(targetDistinctKeys, currentDistinctKeys)) {
               continue;
             }
 
-            if (!matchExprLists(sprayKeyLists.get(i), sprayKeys)) {
+            if (!matchExprLists(targetSprayKeys, currentSprayKeys)) {
               continue;
             }
             // else do common code
@@ -10285,8 +10285,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       // No match was found, so create new entries
       if (!found) {
         inputOperators.add(input);
-        sprayKeyLists.add(sprayKeys);
-        distinctKeyLists.add(distinctKeys);
+        newSprayKeyLists.add(currentSprayKeys);
+        newDistinctKeyLists.add(currentDistinctKeys);
         List<String> destGroup = new ArrayList<String>();
         destGroup.add(dest);
         commonGroupByDestGroups.add(destGroup);
@@ -10296,6 +10296,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return commonGroupByDestGroups;
   }
 
+  protected List<ExprNodeDesc> determineSprayKeys(QBParseInfo qbp, String dest,
+      RowResolver inputRR) throws SemanticException {
+    List<ExprNodeDesc> sprayKeys = new ArrayList<ExprNodeDesc>();
+
+    // Add the group by expressions
+    List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
+    for (ASTNode grpByExpr : grpByExprs) {
+      ExprNodeDesc exprDesc = genExprNodeDesc(grpByExpr, inputRR);
+      if (ExprNodeDescUtils.indexOf(exprDesc, sprayKeys) < 0) {
+        sprayKeys.add(exprDesc);
+      }
+    }
+    return sprayKeys;
+  }
+
   private void combineExprNodeLists(List<ExprNodeDesc> list, List<ExprNodeDesc> list2,
                                     List<ExprNodeDesc> combinedList) {
     combinedList.addAll(list);

http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/test/queries/clientpositive/multi_insert_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_distinct.q b/ql/src/test/queries/clientpositive/multi_insert_distinct.q
new file mode 100644
index 0000000..ac3e5ee
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_insert_distinct.q
@@ -0,0 +1,66 @@
+--! qt:dataset:src
+
+CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED 
+FIELDS TERMINATED BY '\t' 
+LINES TERMINATED BY '\n' 
+;
+
+INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a');
+
+
+CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
+CREATE TABLE tmp_grouped_by_one_col  ( v1 string , cnt__v2 int , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
+CREATE TABLE tmp_grouped_by_two_col  ( v1 string , v2 string , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' ;
+
+
+set hive.explain.user=false;
+set hive.stats.autogather=false; 
+set hive.stats.column.autogather=false; 
+
+explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2;
+
+FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2;
+
+select * from tmp_grouped_by_two_col;
+
+truncate table tmp_grouped_by_two_col;
+
+explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2;
+
+FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2;
+
+select * from tmp_grouped_by_two_col;
+
+explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3;
+
+FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3;
+
+select * from tmp_grouped_by_all_col;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/test/results/clientpositive/multi_insert_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_insert_distinct.q.out b/ql/src/test/results/clientpositive/multi_insert_distinct.q.out
new file mode 100644
index 0000000..e86711a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/multi_insert_distinct.q.out
@@ -0,0 +1,534 @@
+PREHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED 
+FIELDS TERMINATED BY '\t' 
+LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp1
+POSTHOOK: query: CREATE TABLE tmp1 ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED 
+FIELDS TERMINATED BY '\t' 
+LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp1
+PREHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tmp1
+POSTHOOK: query: INSERT INTO tmp1 VALUES ('v1', 'v2', 'v3'), ('v1', 'v2', 'v3a')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tmp1
+POSTHOOK: Lineage: tmp1.v1 SCRIPT []
+POSTHOOK: Lineage: tmp1.v2 SCRIPT []
+POSTHOOK: Lineage: tmp1.v3 SCRIPT []
+PREHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_grouped_by_all_col
+POSTHOOK: query: CREATE TABLE tmp_grouped_by_all_col ( v1 string , v2 string , v3 string ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_grouped_by_all_col
+PREHOOK: query: CREATE TABLE tmp_grouped_by_one_col  ( v1 string , cnt__v2 int , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: query: CREATE TABLE tmp_grouped_by_one_col  ( v1 string , cnt__v2 int , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+PREHOOK: query: CREATE TABLE tmp_grouped_by_two_col  ( v1 string , v2 string , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: CREATE TABLE tmp_grouped_by_two_col  ( v1 string , v2 string , cnt__v3 int ) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+PREHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tmp1
+            Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v2), count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+              sort order: +++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col2:0._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_two_col
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_two_col
+
+PREHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp1
+PREHOOK: Output: default@tmp_grouped_by_one_col
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(distinct v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp1
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+PREHOOK: query: select * from tmp_grouped_by_two_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmp_grouped_by_two_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+v1	v2	2
+PREHOOK: query: truncate table tmp_grouped_by_two_col
+PREHOOK: type: TRUNCATETABLE
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: truncate table tmp_grouped_by_two_col
+POSTHOOK: type: TRUNCATETABLE
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+PREHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tmp1
+            Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v2), count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(v3)
+                keys: v1 (type: string), v2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_two_col
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_two_col
+
+PREHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp1
+PREHOOK: Output: default@tmp_grouped_by_one_col
+PREHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_two_col
+SELECT v1, v2, count(v3) GROUP BY v1, v2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp1
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: Output: default@tmp_grouped_by_two_col
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_two_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+PREHOOK: query: select * from tmp_grouped_by_two_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmp_grouped_by_two_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_grouped_by_two_col
+#### A masked pattern was here ####
+v1	v2	2
+PREHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tmp1
+            Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT v2), count(DISTINCT v3)
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: v1 (type: string), v2 (type: string), v3 (type: string)
+              outputColumnNames: v1, v2, v3
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                keys: v1 (type: string), v2 (type: string), v3 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_one_col
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+              sort order: +++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+              Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.tmp_grouped_by_all_col
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.tmp_grouped_by_all_col
+
+PREHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp1
+PREHOOK: Output: default@tmp_grouped_by_all_col
+PREHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: query: FROM tmp1
+INSERT INTO tmp_grouped_by_one_col 
+SELECT v1, count(distinct v2), count(distinct v3) GROUP BY v1
+INSERT INTO tmp_grouped_by_all_col
+SELECT v1, v2, v3 GROUP BY v1, v2, v3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp1
+POSTHOOK: Output: default@tmp_grouped_by_all_col
+POSTHOOK: Output: default@tmp_grouped_by_one_col
+POSTHOOK: Lineage: tmp_grouped_by_all_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_all_col.v2 SIMPLE [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_all_col.v3 SIMPLE [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v2 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v2, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.cnt__v3 EXPRESSION [(tmp1)tmp1.FieldSchema(name:v3, type:string, comment:null), ]
+POSTHOOK: Lineage: tmp_grouped_by_one_col.v1 SIMPLE [(tmp1)tmp1.FieldSchema(name:v1, type:string, comment:null), ]
+PREHOOK: query: select * from tmp_grouped_by_all_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_grouped_by_all_col
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tmp_grouped_by_all_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_grouped_by_all_col
+#### A masked pattern was here ####
+v1	v2	v3
+v1	v2	v3a

http://git-wip-us.apache.org/repos/asf/hive/blob/5667af34/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
index 6c75853..677d9d4 100644
--- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
+++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out
@@ -39,11 +39,12 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+  Stage-3 depends on stages: Stage-0, Stage-4, Stage-7
   Stage-4 depends on stages: Stage-2
-  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
-  Stage-1 depends on stages: Stage-2
-  Stage-6 depends on stages: Stage-2
+  Stage-6 depends on stages: Stage-1, Stage-4, Stage-7
+  Stage-5 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-5
 
 STAGE PLANS:
   Stage: Stage-2
@@ -56,82 +57,67 @@ STAGE PLANS:
               expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col2 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string)
+              Select Operator
+                expressions: _col0 (type: string), _col2 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col1 (type: double)
-      Execution mode: vectorized
+                Group By Operator
+                  aggregations: count(DISTINCT _col2)
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string), _col2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col1:0._col0)
-            keys: KEY._col0 (type: string)
-            mode: complete
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-              outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e1_n2
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double)
-                outputColumnNames: key, keyd
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(VALUE._col0)
-            keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-            mode: complete
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e1_n2
             Select Operator
-              expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-              outputColumnNames: _col0, _col1, _col2
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: key, keyd
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e2_n3
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
-                outputColumnNames: key, keyd, value
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -157,7 +143,7 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
@@ -165,16 +151,16 @@ STAGE PLANS:
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-5
+  Stage: Stage-6
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -182,6 +168,52 @@ STAGE PLANS:
           Column Types: string, double, string
           Table: default.e2_n3
 
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: double)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e2_n3
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
+              outputColumnNames: key, keyd, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Move Operator
       tables:
@@ -192,7 +224,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.e2_n3
 
-  Stage: Stage-6
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -232,11 +264,12 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+  Stage-3 depends on stages: Stage-0, Stage-4, Stage-7
   Stage-4 depends on stages: Stage-2
-  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
-  Stage-1 depends on stages: Stage-2
-  Stage-6 depends on stages: Stage-2
+  Stage-6 depends on stages: Stage-1, Stage-4, Stage-7
+  Stage-5 depends on stages: Stage-2
+  Stage-1 depends on stages: Stage-5
+  Stage-7 depends on stages: Stage-5
 
 STAGE PLANS:
   Stage: Stage-2
@@ -249,82 +282,68 @@ STAGE PLANS:
               expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col2 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string)
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string), _col2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col1 (type: double)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col1:0._col0)
-            keys: KEY._col0 (type: string)
-            mode: complete
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e1_n2
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: double)
               Select Operator
-                expressions: _col0 (type: string), _col1 (type: double)
-                outputColumnNames: key, keyd
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                expressions: _col0 (type: string), _col2 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                  aggregations: count(DISTINCT _col2)
+                  keys: _col0 (type: string), _col2 (type: string)
                   mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(VALUE._col0)
-            keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-            mode: complete
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e2_n3
             Select Operator
-              expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-              outputColumnNames: _col0, _col1, _col2
+              expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
+              outputColumnNames: key, keyd, value
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e2_n3
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
-                outputColumnNames: key, keyd, value
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -334,15 +353,15 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e1_n2
+              name: default.e2_n3
 
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
-          Columns: key, keyd
-          Column Types: string, double
-          Table: default.e1_n2
+          Columns: key, keyd, value
+          Column Types: string, double, string
+          Table: default.e2_n3
 
   Stage: Stage-4
     Map Reduce
@@ -350,30 +369,75 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+              Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
           mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-5
+  Stage: Stage-6
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
-          Columns: key, keyd, value
-          Column Types: string, double, string
-          Table: default.e2_n3
+          Columns: key, keyd
+          Column Types: string, double
+          Table: default.e1_n2
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e1_n2
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: key, keyd
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1
     Move Operator
@@ -383,26 +447,26 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e2_n3
+              name: default.e1_n2
 
-  Stage: Stage-6
+  Stage: Stage-7
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+              Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1967,15 +2031,16 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-3 is a root stage
   Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0, Stage-5, Stage-7, Stage-10
+  Stage-4 depends on stages: Stage-0, Stage-5, Stage-8, Stage-11
   Stage-5 depends on stages: Stage-3
-  Stage-6 depends on stages: Stage-1, Stage-5, Stage-7, Stage-10
-  Stage-9 depends on stages: Stage-2, Stage-5, Stage-7, Stage-10
-  Stage-1 depends on stages: Stage-3
-  Stage-7 depends on stages: Stage-3
-  Stage-8 depends on stages: Stage-3
-  Stage-2 depends on stages: Stage-8
-  Stage-10 depends on stages: Stage-8
+  Stage-7 depends on stages: Stage-1, Stage-5, Stage-8, Stage-11
+  Stage-10 depends on stages: Stage-2, Stage-5, Stage-8, Stage-11
+  Stage-6 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-6
+  Stage-8 depends on stages: Stage-6
+  Stage-9 depends on stages: Stage-3
+  Stage-2 depends on stages: Stage-9
+  Stage-11 depends on stages: Stage-9
 
 STAGE PLANS:
   Stage: Stage-3
@@ -1988,12 +2053,33 @@ STAGE PLANS:
               expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
               outputColumnNames: _col0, _col1, _col2
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string), _col2 (type: string)
-                sort order: ++
-                Map-reduce partition columns: _col0 (type: string)
+              Select Operator
+                expressions: _col0 (type: string), _col2 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col1 (type: double)
+                Group By Operator
+                  aggregations: count(DISTINCT _col2)
+                  keys: _col0 (type: string), _col2 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string), _col2 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
               Group By Operator
                 aggregations: count(DISTINCT _col1)
                 keys: _col0 (type: string), _col1 (type: double), _col2 (type: string)
@@ -2007,74 +2093,39 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Reduce Operator Tree:
-        Forward
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Group By Operator
-            aggregations: count(DISTINCT KEY._col1:0._col0)
-            keys: KEY._col0 (type: string)
-            mode: complete
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-              outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e1_n2
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double)
-                outputColumnNames: key, keyd
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Group By Operator
-            aggregations: sum(VALUE._col0)
-            keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-            mode: complete
-            outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e1_n2
             Select Operator
-              expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-              outputColumnNames: _col0, _col1, _col2
+              expressions: _col0 (type: string), _col1 (type: double)
+              outputColumnNames: key, keyd
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.e2_n3
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
-                outputColumnNames: key, keyd, value
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
-                  mode: hash
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -2100,7 +2151,7 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
       Execution mode: vectorized
       Reduce Operator Tree:
@@ -2108,16 +2159,16 @@ STAGE PLANS:
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-6
+  Stage: Stage-7
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -2125,7 +2176,7 @@ STAGE PLANS:
           Column Types: string, double, string
           Table: default.e2_n3
 
-  Stage: Stage-9
+  Stage: Stage-10
     Stats Work
       Basic Stats Work:
       Column Stats Desc:
@@ -2133,6 +2184,52 @@ STAGE PLANS:
           Column Types: string, double
           Table: default.e3
 
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: double)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.e2_n3
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string)
+              outputColumnNames: key, keyd, value
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll')
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
   Stage: Stage-1
     Move Operator
       tables:
@@ -2143,7 +2240,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.e2_n3
 
-  Stage: Stage-7
+  Stage: Stage-8
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -2166,7 +2263,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-8
+  Stage: Stage-9
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -2221,7 +2318,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.e3
 
-  Stage: Stage-10
+  Stage: Stage-11
     Map Reduce
       Map Operator Tree:
           TableScan


[3/4] hive git commit: HIVE-19690 : multi-insert query with multiple GBY, and distinct in only some branches can produce incorrect results (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/5ec8e356/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
index 9c4cdec..113ff46 100644
--- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
+++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
@@ -47,10 +47,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 5 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -59,53 +60,84 @@ STAGE PLANS:
                     expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col2 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string)
+                      outputColumnNames: _col0, _col2
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
+                      Group By Operator
+                        aggregations: count(DISTINCT _col2)
+                        keys: _col0 (type: string), _col2 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string), _col2 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col1:0._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-                    outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e1_n2
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-                  mode: complete
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e1_n2
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
                   outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-                    outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e2_n3
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e2_n3
 
   Stage: Stage-0
     Move Operator
@@ -160,10 +192,11 @@ STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 4 (GROUP, 2)
+        Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -172,53 +205,84 @@ STAGE PLANS:
                     expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col2 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string), _col2 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
             Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string)
+                      outputColumnNames: _col0, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(DISTINCT _col2)
+                        keys: _col0 (type: string), _col2 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
+            Execution mode: vectorized
             Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col1:0._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
-                  outputColumnNames: _col0, _col1
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-                    outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e1_n2
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-                  mode: complete
-                  outputColumnNames: _col0, _col1, _col2
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e2_n3
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
+                  outputColumnNames: _col0, _col1
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-                    outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e2_n3
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e1_n2
 
   Stage: Stage-0
     Move Operator
@@ -228,7 +292,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e1_n2
+              name: default.e2_n3
 
   Stage: Stage-3
     Stats Work
@@ -242,7 +306,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.e2_n3
+              name: default.e1_n2
 
   Stage: Stage-4
     Stats Work
@@ -1731,11 +1795,36 @@ STAGE PLANS:
   Stage: Stage-3
     Spark
       Edges:
-        Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2)
-        Reducer 3 <- Map 5 (GROUP, 2)
+        Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Map 6 (GROUP, 2)
+        Reducer 4 <- Map 7 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
-        Map 4 
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: string)
+                      outputColumnNames: _col0, _col2
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(DISTINCT _col2)
+                        keys: _col0 (type: string), _col2 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -1744,14 +1833,20 @@ STAGE PLANS:
                     expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col2 (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string)
+                    Group By Operator
+                      aggregations: sum(_col1)
+                      keys: _col0 (type: string), _col2 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
             Execution mode: vectorized
-        Map 5 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: src
@@ -1773,45 +1868,46 @@ STAGE PLANS:
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
-              Forward
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count(DISTINCT KEY._col1:0._col0)
-                  keys: KEY._col0 (type: string)
-                  mode: complete
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), UDFToDouble(_col1) (type: double)
-                    outputColumnNames: _col0, _col1
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e1_n2
-                Group By Operator
-                  aggregations: sum(VALUE._col0)
-                  keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string)
-                  mode: complete
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e1_n2
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
                   outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string)
-                    outputColumnNames: _col0, _col1, _col2
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          name: default.e2_n3
-        Reducer 3 
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.e2_n3
+        Reducer 4 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(DISTINCT KEY._col3:0._col0)