You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:45 UTC

[47/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
index 1096912..21937f4 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
@@ -272,6 +272,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Local Work:
         Map Reduce Local Work
       Path -> Alias:
@@ -376,6 +392,35 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /srcbucket_mapjoin_part/ds=2008-04-08 [b]
         /srcbucket_mapjoin_part/ds=2008-04-09 [b]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-8
     Conditional Operator
@@ -418,6 +463,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
   Stage: Stage-4
     Map Reduce
@@ -793,7 +843,7 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                         properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                           bucket_count -1
                           column.name.delimiter ,
                           columns key,value1,value2
@@ -814,6 +864,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Local Work:
         Map Reduce Local Work
       Path -> Alias:
@@ -918,6 +984,35 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
         /srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-8
     Conditional Operator
@@ -937,7 +1032,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2
@@ -960,6 +1055,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
   Stage: Stage-4
     Map Reduce
@@ -975,7 +1075,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,value1,value2
@@ -1005,7 +1105,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,value1,value2
@@ -1026,7 +1126,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2
@@ -1062,7 +1162,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                     bucket_count -1
                     column.name.delimiter ,
                     columns key,value1,value2
@@ -1092,7 +1192,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+              COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
               bucket_count -1
               column.name.delimiter ,
               columns key,value1,value2
@@ -1113,7 +1213,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
                 bucket_count -1
                 column.name.delimiter ,
                 columns key,value1,value2

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
index 15286ed..f6652b8 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
@@ -212,6 +212,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Local Work:
         Map Reduce Local Work
       Path -> Alias:
@@ -265,6 +281,35 @@ STAGE PLANS:
             name: default.srcbucket_mapjoin
       Truncated Path -> Alias:
         /srcbucket_mapjoin [a]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -307,6 +352,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
index 3c171d6..095d559 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
@@ -275,6 +275,22 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: true
                     MultiFileSpray: false
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                    outputColumnNames: key, value1, value2
+                    Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+                        tag: -1
+                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                        auto parallelism: false
       Local Work:
         Map Reduce Local Work
       Path -> Alias:
@@ -328,6 +344,35 @@ STAGE PLANS:
             name: default.srcbucket_mapjoin
       Truncated Path -> Alias:
         /srcbucket_mapjoin [a]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
 
   Stage: Stage-7
     Conditional Operator
@@ -370,6 +415,11 @@ STAGE PLANS:
     Stats Work
       Basic Stats Work:
 #### A masked pattern was here ####
+      Column Stats Desc:
+          Columns: key, value1, value2
+          Column Types: string, string, string
+          Table: default.bucketmapjoin_tmp_result
+          Is Table Level Stats: true
 
   Stage: Stage-3
     Map Reduce

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
index 2d6bd6f..b59c4bc 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
@@ -84,6 +84,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table2
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
 SELECT x.key, x.value from 
@@ -190,6 +194,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table2
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
 SELECT * from 
@@ -296,6 +304,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table2
 
 PREHOOK: query: EXPLAIN
 INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
@@ -314,7 +326,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -346,6 +359,22 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table2
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+            outputColumnNames: key, value, ds
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+              keys: ds (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -362,6 +391,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table2
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: EXPLAIN
 INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
@@ -417,4 +479,8 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table2
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
index 7eb36bf..52ef3db 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
@@ -80,6 +80,10 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: value, key
+          Column Types: string, int
+          Table: default.test_table2
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
 SELECT x.value, x.key from 
@@ -151,7 +155,8 @@ POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-1
@@ -183,6 +188,22 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table2
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: int), '1' (type: string)
+            outputColumnNames: value, key, ds
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll')
+              keys: ds (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -199,6 +220,39 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: value, key
+          Column Types: string, int
+          Table: default.test_table2
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
 SELECT x.key, x.value from 

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
index 7efb7ce..eaf85c3 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
@@ -65,39 +65,13 @@ FROM test_table1 a JOIN test_table2 b
 ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
-  Stage-7 has a backup stage: Stage-1
-  Stage-4 depends on stages: Stage-7
-  Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
-  Stage-2 depends on stages: Stage-0
-  Stage-8 has a backup stage: Stage-1
-  Stage-5 depends on stages: Stage-8
-  Stage-1
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-6
-    Conditional Operator
-
-  Stage: Stage-7
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        b 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        b 
-          TableScan
-            alias: b
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 key (type: int)
-                  1 key (type: int)
-
-  Stage: Stage-4
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -106,7 +80,7 @@ STAGE PLANS:
             Filter Operator
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Map Join Operator
+              Sorted Merge Bucket Map Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
@@ -121,8 +95,6 @@ STAGE PLANS:
                     sort order: +
                     Map-reduce partition columns: _col1 (type: int)
                     value expressions: _col2 (type: string)
-      Local Work:
-        Map Reduce Local Work
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -134,6 +106,20 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table3
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+            outputColumnNames: key, key2, value
+            Group By Operator
+              aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll')
+              keys: '1' (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2, _col3
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -150,99 +136,35 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, key2, value
+          Column Types: int, int, string
+          Table: default.test_table3
 
-  Stage: Stage-8
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        a 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        a 
-          TableScan
-            alias: a
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 key (type: int)
-                  1 key (type: int)
-
-  Stage: Stage-5
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: b
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Map Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 key (type: int)
-                  1 key (type: int)
-                outputColumnNames: _col0, _col1, _col7
-                Select Operator
-                  expressions: _col0 (type: int), concat(_col1, _col7) (type: string)
-                  outputColumnNames: _col1, _col2
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    value expressions: _col2 (type: string)
-      Local Work:
-        Map Reduce Local Work
+            Reduce Output Operator
+              key expressions: '1' (type: string)
+              sort order: +
+              Map-reduce partition columns: '1' (type: string)
+              value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
-          outputColumnNames: _col0, _col1, _col2
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
-
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: a
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Sorted Merge Bucket Map Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 key (type: int)
-                  1 key (type: int)
-                outputColumnNames: _col0, _col1, _col7
-                Select Operator
-                  expressions: _col0 (type: int), concat(_col1, _col7) (type: string)
-                  outputColumnNames: _col1, _col2
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    value expressions: _col2 (type: string)
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
-          outputColumnNames: _col0, _col1, _col2
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+          keys: '1' (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Select Operator
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), '1' (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, a.key, concat(a.value, b.value) 
@@ -340,43 +262,13 @@ FROM test_table1 a JOIN test_table2 b
 ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
-  Stage-7 has a backup stage: Stage-1
-  Stage-4 depends on stages: Stage-7
-  Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
-  Stage-2 depends on stages: Stage-0
-  Stage-8 has a backup stage: Stage-1
-  Stage-5 depends on stages: Stage-8
-  Stage-1
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-6
-    Conditional Operator
-
-  Stage: Stage-7
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_1:b 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_1:b 
-          TableScan
-            alias: b
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int)
-                outputColumnNames: _col0
-                Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-4
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -389,7 +281,7 @@ STAGE PLANS:
                 expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
+                Sorted Merge Bucket Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
@@ -401,8 +293,6 @@ STAGE PLANS:
                     sort order: +
                     Map-reduce partition columns: _col1 (type: string)
                     value expressions: _col0 (type: int)
-      Local Work:
-        Map Reduce Local Work
       Reduce Operator Tree:
         Select Operator
           expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
@@ -414,6 +304,20 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table3
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+            outputColumnNames: key, value, ds
+            Group By Operator
+              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+              keys: ds (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -430,105 +334,35 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
-  Stage: Stage-8
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_0:a 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_0:a 
-          TableScan
-            alias: a
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-5
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: b
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int)
-                outputColumnNames: _col0
-                Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Reduce Output Operator
-                    key expressions: _col1 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: string)
-                    value expressions: _col0 (type: int)
-      Local Work:
-        Map Reduce Local Work
-      Reduce Operator Tree:
-        Select Operator
-          expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
-          outputColumnNames: _col0, _col1
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
-
-  Stage: Stage-1
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: a
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                Sorted Merge Bucket Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Reduce Output Operator
-                    key expressions: _col1 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: string)
-                    value expressions: _col0 (type: int)
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
       Reduce Operator Tree:
-        Select Operator
-          expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
-          outputColumnNames: _col0, _col1
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, a.value

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
index 6c10249..661114d 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
@@ -65,43 +65,13 @@ FROM test_table1 a JOIN test_table2 b
 ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
-  Stage-7 has a backup stage: Stage-1
-  Stage-4 depends on stages: Stage-7
-  Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
-  Stage-2 depends on stages: Stage-0
-  Stage-8 has a backup stage: Stage-1
-  Stage-5 depends on stages: Stage-8
-  Stage-1
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-6
-    Conditional Operator
-
-  Stage: Stage-7
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_1:b 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_1:b 
-          TableScan
-            alias: b
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-4
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -114,7 +84,7 @@ STAGE PLANS:
                 expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
+                Sorted Merge Bucket Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
@@ -129,8 +99,6 @@ STAGE PLANS:
                       sort order: -
                       Map-reduce partition columns: _col0 (type: int)
                       value expressions: _col1 (type: string)
-      Local Work:
-        Map Reduce Local Work
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -142,6 +110,20 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table3
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+            outputColumnNames: key, value, ds
+            Group By Operator
+              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+              keys: ds (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -158,111 +140,35 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
-  Stage: Stage-8
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_0:a 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_0:a 
-          TableScan
-            alias: a
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-5
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: b
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col4
-                  Select Operator
-                    expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
-                    outputColumnNames: _col0, _col1
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: -
-                      Map-reduce partition columns: _col0 (type: int)
-                      value expressions: _col1 (type: string)
-      Local Work:
-        Map Reduce Local Work
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
-          outputColumnNames: _col0, _col1
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
-
-  Stage: Stage-1
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: a
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                Sorted Merge Bucket Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col4
-                  Select Operator
-                    expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
-                    outputColumnNames: _col0, _col1
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: -
-                      Map-reduce partition columns: _col0 (type: int)
-                      value expressions: _col1 (type: string)
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
-          outputColumnNames: _col0, _col1
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.value, b.value) 
@@ -347,43 +253,13 @@ JOIN
 ON a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
-  Stage-7 has a backup stage: Stage-1
-  Stage-4 depends on stages: Stage-7
-  Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
-  Stage-2 depends on stages: Stage-0
-  Stage-8 has a backup stage: Stage-1
-  Stage-5 depends on stages: Stage-8
-  Stage-1
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0, Stage-3
+  Stage-3 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-6
-    Conditional Operator
-
-  Stage: Stage-7
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_1:test_table2 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_1:test_table2 
-          TableScan
-            alias: test_table2
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-4
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -396,7 +272,7 @@ STAGE PLANS:
                 expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
+                Sorted Merge Bucket Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
@@ -411,8 +287,6 @@ STAGE PLANS:
                       sort order: -
                       Map-reduce partition columns: _col0 (type: int)
                       value expressions: _col1 (type: string)
-      Local Work:
-        Map Reduce Local Work
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -424,6 +298,20 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 name: default.test_table3
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+            outputColumnNames: key, value, ds
+            Group By Operator
+              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+              keys: ds (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Move Operator
@@ -440,111 +328,35 @@ STAGE PLANS:
   Stage: Stage-2
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: int, string
+          Table: default.test_table3
 
-  Stage: Stage-8
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_0:test_table1 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_0:test_table1 
-          TableScan
-            alias: test_table1
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-5
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: test_table2
-            Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col3
-                  Select Operator
-                    expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
-                    outputColumnNames: _col0, _col1
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: -
-                      Map-reduce partition columns: _col0 (type: int)
-                      value expressions: _col1 (type: string)
-      Local Work:
-        Map Reduce Local Work
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
-          outputColumnNames: _col0, _col1
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
-
-  Stage: Stage-1
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: test_table1
-            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                expressions: key (type: int), value (type: string)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
-                Sorted Merge Bucket Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col3
-                  Select Operator
-                    expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
-                    outputColumnNames: _col0, _col1
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: -
-                      Map-reduce partition columns: _col0 (type: int)
-                      value expressions: _col1 (type: string)
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
       Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
-          outputColumnNames: _col0, _col1
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                name: default.test_table3
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.key, concat(a.value, b.value)