You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by jc...@apache.org on 2018/10/13 19:44:55 UTC

[1/2] hive git commit: HIVE-20702: Account for overhead from datastructure aware estimations during mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)

Repository: hive
Updated Branches:
  refs/heads/master f0b76e240 -> 87414f37e


http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out
index 0674ae4..16274f3 100644
--- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out
@@ -57,9 +57,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 4 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+        Map 3 <- Map 1 (BROADCAST_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -80,13 +80,13 @@ STAGE PLANS:
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
-                        Estimated key counts: Map 4 => 25
+                        Estimated key counts: Map 2 => 25
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
                         outputColumnNames: _col1, _col2, _col3
                         input vertices:
-                          1 Map 4
+                          1 Map 2
                         Position of Big Table: 0
                         Statistics: Num rows: 39 Data size: 10374 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
@@ -153,7 +153,7 @@ STAGE PLANS:
                   name: default.src
             Truncated Path -> Alias:
               /src [y]
-        Map 4 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: x
@@ -232,7 +232,7 @@ STAGE PLANS:
                   name: default.src1
             Truncated Path -> Alias:
               /src1 [x]
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: z
@@ -247,15 +247,72 @@ STAGE PLANS:
                       expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: 1
-                        auto parallelism: true
-            Execution mode: vectorized, llap
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        Estimated key counts: Map 1 => 39
+                        keys:
+                          0 _col3 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col1, _col2, _col4
+                        input vertices:
+                          0 Map 1
+                        Position of Big Table: 1
+                        Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            GlobalTableId: 1
+#### A masked pattern was here ####
+                            NumFilesPerFileSink: 1
+                            Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                properties:
+                                  COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
+                                  bucket_count -1
+                                  bucketing_version 2
+                                  column.name.delimiter ,
+                                  columns key,value,val2
+                                  columns.comments 
+                                  columns.types string:string:string
+#### A masked pattern was here ####
+                                  name default.dest_j1_n21
+                                  numFiles 0
+                                  numRows 0
+                                  rawDataSize 0
+                                  serialization.ddl struct dest_j1_n21 { string key, string value, string val2}
+                                  serialization.format 1
+                                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                  totalSize 0
+#### A masked pattern was here ####
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                name: default.dest_j1_n21
+                            TotalFiles: 1
+                            GatherStats: true
+                            MultiFileSpray: false
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                            outputColumnNames: key, value, val2
+                            Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
+                            Group By Operator
+                              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
+                              mode: hash
+                              outputColumnNames: _col0, _col1, _col2
+                              Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                              Reduce Output Operator
+                                null sort order: 
+                                sort order: 
+                                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                                tag: -1
+                                value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                                auto parallelism: false
+            Execution mode: llap
             LLAP IO: no inputs
             Path -> Alias:
 #### A masked pattern was here ####
@@ -311,73 +368,7 @@ STAGE PLANS:
                   name: default.srcpart
             Truncated Path -> Alias:
               /srcpart/ds=2008-04-08/hr=11 [z]
-        Reducer 2 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col3 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2, _col4
-                Position of Big Table: 1
-                Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 1
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
-                          bucket_count -1
-                          bucketing_version 2
-                          column.name.delimiter ,
-                          columns key,value,val2
-                          columns.comments 
-                          columns.types string:string:string
-#### A masked pattern was here ####
-                          name default.dest_j1_n21
-                          numFiles 0
-                          numRows 0
-                          rawDataSize 0
-                          serialization.ddl struct dest_j1_n21 { string key, string value, string val2}
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          totalSize 0
-#### A masked pattern was here ####
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest_j1_n21
-                    TotalFiles: 1
-                    GatherStats: true
-                    MultiFileSpray: false
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    outputColumnNames: key, value, val2
-                    Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        null sort order: 
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-                        auto parallelism: false
-        Reducer 3 
+        Reducer 4 
             Execution mode: llap
             Needs Tagging: false
             Reduce Operator Tree:
@@ -611,9 +602,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-        Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE)
+        Map 4 <- Map 3 (BROADCAST_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -695,7 +686,7 @@ STAGE PLANS:
                   name: default.src1
             Truncated Path -> Alias:
               /src1 [x]
-        Map 5 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: z
@@ -774,7 +765,7 @@ STAGE PLANS:
                   name: default.src1
             Truncated Path -> Alias:
               /src1 [z]
-        Map 6 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -789,15 +780,30 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: 2
-                        value expressions: _col1 (type: string)
-                        auto parallelism: true
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                             Inner Join 0 to 2
+                        Estimated key counts: Map 1 => 25, Map 2 => 25
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                          2 _col0 (type: string)
+                        outputColumnNames: _col0, _col1, _col3, _col5
+                        input vertices:
+                          0 Map 1
+                          1 Map 2
+                        Position of Big Table: 2
+                        Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col1 (type: string)
+                          null sort order: a
+                          sort order: +
+                          Map-reduce partition columns: _col1 (type: string)
+                          Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE
+                          tag: 0
+                          value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string)
+                          auto parallelism: true
             Execution mode: vectorized, llap
             LLAP IO: no inputs
             Path -> Alias:
@@ -853,7 +859,7 @@ STAGE PLANS:
                   name: default.src
             Truncated Path -> Alias:
               /src [y]
-        Map 7 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: w
@@ -868,15 +874,72 @@ STAGE PLANS:
                       expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: 1
-                        auto parallelism: true
-            Execution mode: vectorized, llap
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        Estimated key counts: Map 3 => 61
+                        keys:
+                          0 _col1 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col0, _col3, _col5
+                        input vertices:
+                          0 Map 3
+                        Position of Big Table: 1
+                        Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            GlobalTableId: 1
+#### A masked pattern was here ####
+                            NumFilesPerFileSink: 1
+                            Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                properties:
+                                  COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
+                                  bucket_count -1
+                                  bucketing_version 2
+                                  column.name.delimiter ,
+                                  columns key,value,val2
+                                  columns.comments 
+                                  columns.types string:string:string
+#### A masked pattern was here ####
+                                  name default.dest_j1_n21
+                                  numFiles 1
+                                  numRows 85
+                                  rawDataSize 1600
+                                  serialization.ddl struct dest_j1_n21 { string key, string value, string val2}
+                                  serialization.format 1
+                                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                  totalSize 1685
+#### A masked pattern was here ####
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                name: default.dest_j1_n21
+                            TotalFiles: 1
+                            GatherStats: true
+                            MultiFileSpray: false
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                            outputColumnNames: key, value, val2
+                            Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
+                            Group By Operator
+                              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
+                              mode: hash
+                              outputColumnNames: _col0, _col1, _col2
+                              Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                              Reduce Output Operator
+                                null sort order: 
+                                sort order: 
+                                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                                tag: -1
+                                value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                                auto parallelism: false
+            Execution mode: llap
             LLAP IO: no inputs
             Path -> Alias:
 #### A masked pattern was here ####
@@ -931,97 +994,7 @@ STAGE PLANS:
                   name: default.src
             Truncated Path -> Alias:
               /src [w]
-        Reducer 2 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                     Inner Join 0 to 2
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                  2 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3, _col5
-                Position of Big Table: 2
-                Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col1 (type: string)
-                  null sort order: a
-                  sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
-                  Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE
-                  tag: 0
-                  value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string)
-                  auto parallelism: true
-        Reducer 3 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col3, _col5
-                Position of Big Table: 0
-                Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 1
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
-                          bucket_count -1
-                          bucketing_version 2
-                          column.name.delimiter ,
-                          columns key,value,val2
-                          columns.comments 
-                          columns.types string:string:string
-#### A masked pattern was here ####
-                          name default.dest_j1_n21
-                          numFiles 1
-                          numRows 85
-                          rawDataSize 1600
-                          serialization.ddl struct dest_j1_n21 { string key, string value, string val2}
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          totalSize 1685
-#### A masked pattern was here ####
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest_j1_n21
-                    TotalFiles: 1
-                    GatherStats: true
-                    MultiFileSpray: false
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    outputColumnNames: key, value, val2
-                    Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        null sort order: 
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-                        auto parallelism: false
-        Reducer 4 
+        Reducer 5 
             Execution mode: llap
             Needs Tagging: false
             Reduce Operator Tree:
@@ -1252,9 +1225,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 4 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+        Map 3 <- Map 1 (BROADCAST_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1275,13 +1248,13 @@ STAGE PLANS:
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
-                        Estimated key counts: Map 4 => 25
+                        Estimated key counts: Map 2 => 25
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
                         outputColumnNames: _col1, _col2
                         input vertices:
-                          1 Map 4
+                          1 Map 2
                         Position of Big Table: 0
                         Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
@@ -1348,7 +1321,7 @@ STAGE PLANS:
                   name: default.src
             Truncated Path -> Alias:
               /src [y]
-        Map 4 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: x
@@ -1427,7 +1400,7 @@ STAGE PLANS:
                   name: default.src1
             Truncated Path -> Alias:
               /src1 [x]
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: z
@@ -1442,15 +1415,72 @@ STAGE PLANS:
                       expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: 1
-                        auto parallelism: true
-            Execution mode: vectorized, llap
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        Estimated key counts: Map 1 => 39
+                        keys:
+                          0 _col2 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col1, _col2, _col3
+                        input vertices:
+                          0 Map 1
+                        Position of Big Table: 1
+                        Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            GlobalTableId: 1
+#### A masked pattern was here ####
+                            NumFilesPerFileSink: 1
+                            Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                properties:
+                                  COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
+                                  bucket_count -1
+                                  bucketing_version 2
+                                  column.name.delimiter ,
+                                  columns key,value,val2
+                                  columns.comments 
+                                  columns.types string:string:string
+#### A masked pattern was here ####
+                                  name default.dest_j2_n1
+                                  numFiles 0
+                                  numRows 0
+                                  rawDataSize 0
+                                  serialization.ddl struct dest_j2_n1 { string key, string value, string val2}
+                                  serialization.format 1
+                                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                  totalSize 0
+#### A masked pattern was here ####
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                name: default.dest_j2_n1
+                            TotalFiles: 1
+                            GatherStats: true
+                            MultiFileSpray: false
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                            outputColumnNames: key, value, val2
+                            Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                            Group By Operator
+                              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
+                              mode: hash
+                              outputColumnNames: _col0, _col1, _col2
+                              Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                              Reduce Output Operator
+                                null sort order: 
+                                sort order: 
+                                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                                tag: -1
+                                value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                                auto parallelism: false
+            Execution mode: llap
             LLAP IO: no inputs
             Path -> Alias:
 #### A masked pattern was here ####
@@ -1506,73 +1536,7 @@ STAGE PLANS:
                   name: default.srcpart
             Truncated Path -> Alias:
               /srcpart/ds=2008-04-08/hr=11 [z]
-        Reducer 2 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col2 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2, _col3
-                Position of Big Table: 1
-                Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 1
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
-                          bucket_count -1
-                          bucketing_version 2
-                          column.name.delimiter ,
-                          columns key,value,val2
-                          columns.comments 
-                          columns.types string:string:string
-#### A masked pattern was here ####
-                          name default.dest_j2_n1
-                          numFiles 0
-                          numRows 0
-                          rawDataSize 0
-                          serialization.ddl struct dest_j2_n1 { string key, string value, string val2}
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          totalSize 0
-#### A masked pattern was here ####
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest_j2_n1
-                    TotalFiles: 1
-                    GatherStats: true
-                    MultiFileSpray: false
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    outputColumnNames: key, value, val2
-                    Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        null sort order: 
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-                        auto parallelism: false
-        Reducer 3 
+        Reducer 4 
             Execution mode: llap
             Needs Tagging: false
             Reduce Operator Tree:
@@ -1804,9 +1768,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Map 4 <- Reducer 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1888,7 +1852,7 @@ STAGE PLANS:
                   name: default.src1
             Truncated Path -> Alias:
               /src1 [x]
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -1961,7 +1925,7 @@ STAGE PLANS:
                   name: default.src
             Truncated Path -> Alias:
               /src [y]
-        Map 6 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: z
@@ -1976,15 +1940,72 @@ STAGE PLANS:
                       expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: 1
-                        auto parallelism: true
-            Execution mode: vectorized, llap
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        Estimated key counts: Reducer 2 => 64
+                        keys:
+                          0 _col1 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Reducer 2
+                        Position of Big Table: 1
+                        Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            GlobalTableId: 1
+#### A masked pattern was here ####
+                            NumFilesPerFileSink: 1
+                            Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                properties:
+                                  COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
+                                  bucket_count -1
+                                  bucketing_version 2
+                                  column.name.delimiter ,
+                                  columns key,value,val2
+                                  columns.comments 
+                                  columns.types string:string:string
+#### A masked pattern was here ####
+                                  name default.dest_j2_n1
+                                  numFiles 1
+                                  numRows 85
+                                  rawDataSize 1600
+                                  serialization.ddl struct dest_j2_n1 { string key, string value, string val2}
+                                  serialization.format 1
+                                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                  totalSize 1685
+#### A masked pattern was here ####
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                name: default.dest_j2_n1
+                            TotalFiles: 1
+                            GatherStats: true
+                            MultiFileSpray: false
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                            outputColumnNames: key, value, val2
+                            Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
+                            Group By Operator
+                              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
+                              mode: hash
+                              outputColumnNames: _col0, _col1, _col2
+                              Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                              Reduce Output Operator
+                                null sort order: 
+                                sort order: 
+                                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                                tag: -1
+                                value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                                auto parallelism: false
+            Execution mode: llap
             LLAP IO: no inputs
             Path -> Alias:
 #### A masked pattern was here ####
@@ -2062,73 +2083,7 @@ STAGE PLANS:
                   tag: 0
                   value expressions: _col0 (type: string)
                   auto parallelism: true
-        Reducer 3 
-            Execution mode: llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col3
-                Position of Big Table: 0
-                Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 1
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}}
-                          bucket_count -1
-                          bucketing_version 2
-                          column.name.delimiter ,
-                          columns key,value,val2
-                          columns.comments 
-                          columns.types string:string:string
-#### A masked pattern was here ####
-                          name default.dest_j2_n1
-                          numFiles 1
-                          numRows 85
-                          rawDataSize 1600
-                          serialization.ddl struct dest_j2_n1 { string key, string value, string val2}
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          totalSize 1685
-#### A masked pattern was here ####
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest_j2_n1
-                    TotalFiles: 1
-                    GatherStats: true
-                    MultiFileSpray: false
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    outputColumnNames: key, value, val2
-                    Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        null sort order: 
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-                        auto parallelism: false
-        Reducer 4 
+        Reducer 5 
             Execution mode: llap
             Needs Tagging: false
             Reduce Operator Tree:
@@ -2363,9 +2318,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 4 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+        Map 3 <- Map 1 (BROADCAST_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2389,7 +2344,7 @@ STAGE PLANS:
                           1 _col0 (type: string)
                         outputColumnNames: _col1, _col2
                         input vertices:
-                          1 Map 4
+                          1 Map 2
                         Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col2 (type: string)
@@ -2399,7 +2354,7 @@ STAGE PLANS:
                           value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 4 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: x
@@ -2420,7 +2375,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: x
@@ -2433,50 +2388,44 @@ STAGE PLANS:
                       expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Reducer 2 
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col2 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col1, _col2, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                name: default.dest_j2_n1
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                            outputColumnNames: key, value, val2
+                            Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                            Group By Operator
+                              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
+                              mode: hash
+                              outputColumnNames: _col0, _col1, _col2
+                              Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                              Reduce Output Operator
+                                sort order: 
+                                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                                value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col2 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2, _col3
-                Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest_j2_n1
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    outputColumnNames: key, value, val2
-                    Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-        Reducer 3 
+            LLAP IO: no inputs
+        Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -2662,9 +2611,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 4 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+        Map 3 <- Map 1 (BROADCAST_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2688,7 +2637,7 @@ STAGE PLANS:
                           1 _col0 (type: string)
                         outputColumnNames: _col1, _col2
                         input vertices:
-                          1 Map 4
+                          1 Map 2
                         Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col2 (type: string)
@@ -2698,7 +2647,7 @@ STAGE PLANS:
                           value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 4 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: x
@@ -2719,7 +2668,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -2732,50 +2681,44 @@ STAGE PLANS:
                       expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Reducer 2 
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col2 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col1, _col2, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                name: default.dest_j2_n1
+                          Select Operator
+                            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                            outputColumnNames: key, value, val2
+                            Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
+                            Group By Operator
+                              aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
+                              mode: hash
+                              outputColumnNames: _col0, _col1, _col2
+                              Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                              Reduce Output Operator
+                                sort order: 
+                                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                                value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col2 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2, _col3
-                Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest_j2_n1
-                  Select Operator
-                    expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                    outputColumnNames: key, value, val2
-                    Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-        Reducer 3 
+            LLAP IO: no inputs
+        Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
index cce6519..79e242d 100644
--- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
+++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
@@ -559,7 +559,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -575,26 +575,15 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: string)
-                          1 _col0 (type: string)
-                        outputColumnNames: _col0, _col1
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
-                          table:
-                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 2 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -614,6 +603,28 @@ STAGE PLANS:
                         Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 KEY.reducesinkkey0 (type: string)
+                  1 KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                input vertices:
+                  1 Map 3
+                Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                DynamicPartitionHashJoin: true
+                HybridGraceHashJoin: true
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -642,7 +653,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -658,26 +669,14 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: string), _col1 (type: string)
-                          1 _col0 (type: string), _col1 (type: string)
-                        outputColumnNames: _col0, _col1
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
-                          table:
-                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 2 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: y
@@ -697,6 +696,28 @@ STAGE PLANS:
                         Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
+                  1 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
+                outputColumnNames: _col0, _col1
+                input vertices:
+                  1 Map 3
+                Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                DynamicPartitionHashJoin: true
+                HybridGraceHashJoin: true
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
index 71f3e2c..b1eec43 100644
--- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
@@ -3824,9 +3824,9 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Map 1 <- Union 2 (CONTAINS)
+        Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
         Reducer 3 <- Union 2 (SIMPLE_EDGE)
         Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-        Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 2 (CONTAINS)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -3928,15 +3928,41 @@ STAGE PLANS:
                       expressions: k1 (type: string), k2 (type: string), ds (type: string)
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: 0
-                        value expressions: _col1 (type: string), _col2 (type: string)
-                        auto parallelism: true
+                      Map Join Operator
+                        condition map:
+                             Left Outer Join 0 to 1
+                        filter mappings:
+                          0 [1, 1]
+                        filter predicates:
+                          0 {(_col2 = '1')}
+                          1 
+                        Estimated key counts: Map 6 => 55
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col0, _col1, _col4, _col5
+                        input vertices:
+                          1 Map 6
+                        Position of Big Table: 0
+                        Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE
+                        HybridGraceHashJoin: true
+                        Select Operator
+                          expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string)
+                          outputColumnNames: _col0, _col1, _col2, _col3
+                          Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE
+                          Group By Operator
+                            keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                            mode: hash
+                            outputColumnNames: _col0, _col1, _col2, _col3
+                            Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE
+                            Reduce Output Operator
+                              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                              null sort order: aaaa
+                              sort order: ++++
+                              Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+                              Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE
+                              tag: -1
+                              auto parallelism: true
             Execution mode: vectorized
             Path -> Alias:
               hdfs://### HDFS PATH ### [a]
@@ -3993,7 +4019,7 @@ STAGE PLANS:
                   name: default.dst_union22_n0
             Truncated Path -> Alias:
               /dst_union22_n0/ds=1 [a]
-        Map 7 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: dst_union22_delta_n0
@@ -4169,40 +4195,6 @@ STAGE PLANS:
                     TotalFiles: 1
                     GatherStats: false
                     MultiFileSpray: false
-        Reducer 6 
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Left Outer Join 0 to 1
-                filter mappings:
-                  0 [1, 1]
-                filter predicates:
-                  0 {(VALUE._col1 = '1')}
-                  1 
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col4, _col5
-                Position of Big Table: 0
-                Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3
-                  Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2, _col3
-                    Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-                      null sort order: aaaa
-                      sort order: ++++
-                      Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
-                      Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE
-                      tag: -1
-                      auto parallelism: true
         Union 2 
             Vertex: Union 2

[2/2] hive git commit: HIVE-20702: Account for overhead from datastructure aware estimations during mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)

Posted by jc...@apache.org.

HIVE-20702: Account for overhead from datastructure aware estimations during mapjoin selection (Jesus Camacho Rodriguez, reviewed by Zoltan Haindrich)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87414f37
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87414f37
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87414f37

Branch: refs/heads/master
Commit: 87414f37eb6f2e2b437bf5df981335ce0b5a7a53
Parents: f0b76e2
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sat Oct 13 12:44:38 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Oct 13 12:44:38 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |  41 +-
 .../llap/auto_sortmerge_join_13.q.out           | 117 ++-
 .../llap/bucket_map_join_tez2.q.out             | 155 ++--
 .../clientpositive/llap/join32_lessSize.q.out   | 863 +++++++++----------
 .../llap/join_max_hashtable.q.out               |  97 ++-
 .../clientpositive/llap/unionDistinct_1.q.out   |  82 +-
 6 files changed, 683 insertions(+), 672 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 5217208..b4cc76a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -68,6 +68,8 @@ import org.apache.hadoop.hive.ql.plan.OpTraits;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -348,19 +350,54 @@ public class ConvertJoinMapJoin implements NodeProcessor {
 
 
   public long computeOnlineDataSizeGeneric(Statistics statistics, long overHeadPerRow, long overHeadPerSlot) {
-
     long onlineDataSize = 0;
     long numRows = statistics.getNumRows();
     if (numRows <= 0) {
       numRows = 1;
     }
     long worstCaseNeededSlots = 1L << DoubleMath.log2(numRows / hashTableLoadFactor, RoundingMode.UP);
-    onlineDataSize += statistics.getDataSize();
+    onlineDataSize += statistics.getDataSize() - hashTableDataSizeAdjustment(numRows, statistics.getColumnStats());
     onlineDataSize += overHeadPerRow * statistics.getNumRows();
     onlineDataSize += overHeadPerSlot * worstCaseNeededSlots;
     return onlineDataSize;
   }
 
+  /**
+   * In data calculation logic, we include some overhead due to java object refs, etc.
+   * However, this overhead may be different when storing values in hashtable for mapjoin.
+   * Hence, we calculate a size adjustment to the original data size for a given input.
+   */
+  private static long hashTableDataSizeAdjustment(long numRows, List<ColStatistics> colStats) {
+    long result = 0;
+
+    if (numRows <= 0 || colStats == null || colStats.isEmpty()) {
+      return result;
+    }
+
+    for (ColStatistics cs : colStats) {
+      if (cs != null) {
+        String colTypeLowerCase = cs.getColumnType().toLowerCase();
+        long nonNullCount = cs.getNumNulls() > 0 ? numRows - cs.getNumNulls() + 1 : numRows;
+        double overhead = 0;
+        if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+            || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+            || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+          overhead = JavaDataModel.get().lengthForStringOfLength(0);
+        } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
+          overhead = JavaDataModel.get().lengthForByteArrayOfSize(0);
+        } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) ||
+            colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME) ||
+            colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME) ||
+            colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
+          overhead = JavaDataModel.get().object();
+        }
+        result = StatsUtils.safeAdd(StatsUtils.safeMult(nonNullCount, overhead), result);
+      }
+    }
+
+    return result;
+  }
+
   @VisibleForTesting
   public MemoryMonitorInfo getMemoryMonitorInfo(
                                                 final HiveConf conf,

http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
index 1a28ce8..a8bc8b8 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
@@ -87,6 +87,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Map 1 <- Map 4 (CUSTOM_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -94,18 +95,6 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
@@ -116,13 +105,15 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
+                      Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col2 (type: int)
@@ -175,6 +166,28 @@ STAGE PLANS:
                                 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
                                 value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -368,6 +381,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Map 1 <- Map 4 (CUSTOM_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -375,18 +389,6 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
@@ -397,13 +399,15 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
+                      Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col2 (type: int)
@@ -456,6 +460,28 @@ STAGE PLANS:
                                 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
                                 value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -649,6 +675,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
+        Map 1 <- Map 4 (CUSTOM_EDGE)
         Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
         Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
@@ -656,18 +683,6 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-            Map Operator Tree:
-                TableScan
                   alias: a
                   filterExpr: key is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
@@ -678,13 +693,15 @@ STAGE PLANS:
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
-                      Merge Join Operator
+                      Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1, _col2, _col3
+                        input vertices:
+                          1 Map 4
                         Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col2 (type: int)
@@ -737,6 +754,28 @@ STAGE PLANS:
                                 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
                                 value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/87414f37/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
index bbc06fa..8fe30f0 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
@@ -481,7 +481,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Map 2 <- Map 1 (BROADCAST_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -505,7 +505,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: int), _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -518,36 +518,29 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: UDFToDouble(_col0) (type: double)
-                        sort order: +
-                        Map-reduce partition columns: UDFToDouble(_col0) (type: double)
-                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string)
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 UDFToDouble(_col0) (type: double)
+                          1 UDFToDouble(_col0) (type: double)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 UDFToDouble(_col0) (type: double)
-                  1 UDFToDouble(_col0) (type: double)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -578,7 +571,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Map 2 <- Map 1 (BROADCAST_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -602,7 +595,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: int), _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -615,36 +608,29 @@ STAGE PLANS:
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: UDFToDouble(_col0) (type: double)
-                        sort order: +
-                        Map-reduce partition columns: UDFToDouble(_col0) (type: double)
-                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: string)
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 UDFToDouble(_col0) (type: double)
+                          1 UDFToDouble(_col0) (type: double)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 UDFToDouble(_col0) (type: double)
-                  1 UDFToDouble(_col0) (type: double)
-                outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -1636,9 +1622,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1662,7 +1647,7 @@ STAGE PLANS:
                         Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -1690,31 +1675,25 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
-                  Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int), _col1 (type: string)
-                  1 _col0 (type: int), _col1 (type: string)
-                Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    sort order: 
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int), _col1 (type: string)
+                    1 _col0 (type: int), _col1 (type: string)
+                  input vertices:
+                    1 Map 4
+                  Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col0 (type: bigint)
-        Reducer 4 
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator