You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/12/12 20:51:34 UTC

[11/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)

http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
index 243a49b..25bac39 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
@@ -1,8 +1,8 @@
-PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@bucket_small
-POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@bucket_small
@@ -23,11 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2out
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@bucket_small@ds=2008-04-08
-PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@bucket_big
-POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@bucket_big
@@ -102,15 +102,23 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-5 is a root stage
-  Stage-2 depends on stages: Stage-5
+  Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
+  Stage-7 has a backup stage: Stage-1
+  Stage-4 depends on stages: Stage-7
+  Stage-2 depends on stages: Stage-1, Stage-4, Stage-5
+  Stage-8 has a backup stage: Stage-1
+  Stage-5 depends on stages: Stage-8
+  Stage-1
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-5
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-7
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_0:a 
+        $hdt$_1:b 
           Fetch Operator
             limit: -1
             Partition Description:
@@ -121,101 +129,161 @@ STAGE PLANS:
                   partition values:
                     ds 2008-04-08
                   properties:
-                    bucket_count 2
+                    bucket_count 4
                     bucket_field_name key
                     column.name.delimiter ,
                     columns key,value
                     columns.comments 
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 2
+                    name default.bucket_big
+                    numFiles 4
                     numRows 0
                     partition_columns ds
                     partition_columns.types string
                     rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string value}
+                    serialization.ddl struct bucket_big { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 114
+                    totalSize 5812
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
-                      bucket_count 2
+                      SORTBUCKETCOLSPREFIX TRUE
+                      bucket_count 4
                       bucket_field_name key
                       column.name.delimiter ,
                       columns key,value
                       columns.comments 
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.bucket_small
+                      name default.bucket_big
                       partition_columns ds
                       partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, string value}
+                      serialization.ddl struct bucket_big { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
+                    name: default.bucket_big
+                  name: default.bucket_big
+                Partition
+                  base file name: ds=2008-04-09
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-09
+                  properties:
+                    bucket_count 4
+                    bucket_field_name key
+                    column.name.delimiter ,
+                    columns key,value
+                    columns.comments 
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.bucket_big
+                    numFiles 4
+                    numRows 0
+                    partition_columns ds
+                    partition_columns.types string
+                    rawDataSize 0
+                    serialization.ddl struct bucket_big { string key, string value}
+                    serialization.format 1
+                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      SORTBUCKETCOLSPREFIX TRUE
+                      bucket_count 4
+                      bucket_field_name key
+                      column.name.delimiter ,
+                      columns key,value
+                      columns.comments 
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.bucket_big
+                      partition_columns ds
+                      partition_columns.types string
+                      serialization.ddl struct bucket_big { string key, string value}
+                      serialization.format 1
+                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.bucket_big
+                  name: default.bucket_big
       Alias -> Map Local Operator Tree:
-        $hdt$_0:a 
+        $hdt$_1:b 
           TableScan
-            alias: a
-            Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+            alias: b
+            Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
                     0 _col0 (type: string)
                     1 _col0 (type: string)
-                  Position of Big Table: 1
+                  Position of Big Table: 0
 
-  Stage: Stage-2
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: b
-            Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+            alias: a
+            Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
             GatherStats: false
             Filter Operator
               isSamplingPred: false
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
                     0 _col0 (type: string)
                     1 _col0 (type: string)
-                  Position of Big Table: 1
+                  Position of Big Table: 0
                   Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      null sort order: 
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                      tag: -1
-                      value expressions: _col0 (type: bigint)
-                      auto parallelism: false
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            column.name.delimiter ,
+                            columns _col0
+                            columns.types bigint
+                            escape.delim \
+                            serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                          serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
       Local Work:
         Map Reduce Local Work
       Path -> Alias:
@@ -252,6 +320,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -300,6 +369,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -348,6 +418,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 2
                 bucket_field_name key
                 column.name.delimiter ,
@@ -366,8 +437,47 @@ STAGE PLANS:
               name: default.bucket_small
             name: default.bucket_small
       Truncated Path -> Alias:
-        /bucket_big/ds=2008-04-08 [$hdt$_1:b]
-        /bucket_big/ds=2008-04-09 [$hdt$_1:b]
+        /bucket_small/ds=2008-04-08 [$hdt$_0:a]
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            GatherStats: false
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              tag: -1
+              value expressions: _col0 (type: bigint)
+              auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10004
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              column.name.delimiter ,
+              columns _col0
+              columns.types bigint
+              escape.delim \
+              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                column.name.delimiter ,
+                columns _col0
+                columns.types bigint
+                escape.delim \
+                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
       Needs Tagging: false
       Reduce Operator Tree:
         Group By Operator
@@ -398,40 +508,7 @@ STAGE PLANS:
             GatherStats: false
             MultiFileSpray: false
 
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucket_big
-PREHOOK: Input: default@bucket_big@ds=2008-04-08
-PREHOOK: Input: default@bucket_big@ds=2008-04-09
-PREHOOK: Input: default@bucket_small
-PREHOOK: Input: default@bucket_small@ds=2008-04-08
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucket_big
-POSTHOOK: Input: default@bucket_big@ds=2008-04-08
-POSTHOOK: Input: default@bucket_big@ds=2008-04-09
-POSTHOOK: Input: default@bucket_small
-POSTHOOK: Input: default@bucket_small@ds=2008-04-08
-#### A masked pattern was here ####
-38
-PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-5 is a root stage
-  Stage-2 depends on stages: Stage-5
-  Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
-  Stage: Stage-5
+  Stage: Stage-8
     Map Reduce Local Work
       Alias -> Map Local Tables:
         $hdt$_0:a 
@@ -468,6 +545,7 @@ STAGE PLANS:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
+                      SORTBUCKETCOLSPREFIX TRUE
                       bucket_count 2
                       bucket_field_name key
                       column.name.delimiter ,
@@ -505,7 +583,7 @@ STAGE PLANS:
                     1 _col0 (type: string)
                   Position of Big Table: 1
 
-  Stage: Stage-2
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -533,13 +611,24 @@ STAGE PLANS:
                     mode: hash
                     outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      null sort order: 
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                      tag: -1
-                      value expressions: _col0 (type: bigint)
-                      auto parallelism: false
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+#### A masked pattern was here ####
+                      NumFilesPerFileSink: 1
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          properties:
+                            column.name.delimiter ,
+                            columns _col0
+                            columns.types bigint
+                            escape.delim \
+                            serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                          serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                      TotalFiles: 1
+                      GatherStats: false
+                      MultiFileSpray: false
       Local Work:
         Map Reduce Local Work
       Path -> Alias:
@@ -576,6 +665,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -624,6 +714,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -672,6 +763,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 2
                 bucket_field_name key
                 column.name.delimiter ,
@@ -692,145 +784,466 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /bucket_big/ds=2008-04-08 [$hdt$_1:b]
         /bucket_big/ds=2008-04-09 [$hdt$_1:b]
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            GlobalTableId: 0
-#### A masked pattern was here ####
-            NumFilesPerFileSink: 1
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                properties:
-                  columns _col0
-                  columns.types bigint
-                  escape.delim \
-                  hive.serialization.extend.additional.nesting.levels true
-                  serialization.escape.crlf true
-                  serialization.format 1
-                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-            TotalFiles: 1
-            GatherStats: false
-            MultiFileSpray: false
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
 
-PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucket_big
-PREHOOK: Input: default@bucket_big@ds=2008-04-08
-PREHOOK: Input: default@bucket_big@ds=2008-04-09
-PREHOOK: Input: default@bucket_small
-PREHOOK: Input: default@bucket_small@ds=2008-04-08
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: a
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                  tag: 0
+                  auto parallelism: false
+          TableScan
+            alias: b
+            Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: a
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                  tag: 1
+                  auto parallelism: false
+      Path -> Alias:
 #### A masked pattern was here ####
-POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucket_big
-POSTHOOK: Input: default@bucket_big@ds=2008-04-08
-POSTHOOK: Input: default@bucket_big@ds=2008-04-09
-POSTHOOK: Input: default@bucket_small
-POSTHOOK: Input: default@bucket_small@ds=2008-04-08
+      Path -> Partition:
 #### A masked pattern was here ####
-38
-PREHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-1 depends on stages: Stage-3
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-3
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        a 
-          Fetch Operator
-            limit: -1
-            Partition Description:
-                Partition
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                  properties:
-                    bucket_count 2
-                    bucket_field_name key
+          Partition
+            base file name: ds=2008-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+            properties:
+              bucket_count 4
+              bucket_field_name key
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_big
+              numFiles 4
+              numRows 0
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 0
+              serialization.ddl struct bucket_big { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 4
+                bucket_field_name key
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_big
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct bucket_big { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_big
+            name: default.bucket_big
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-09
+            properties:
+              bucket_count 4
+              bucket_field_name key
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_big
+              numFiles 4
+              numRows 0
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 0
+              serialization.ddl struct bucket_big { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 4
+                bucket_field_name key
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_big
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct bucket_big { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_big
+            name: default.bucket_big
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+            properties:
+              bucket_count 2
+              bucket_field_name key
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_small
+              numFiles 2
+              numRows 0
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 0
+              serialization.ddl struct bucket_small { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 114
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 2
+                bucket_field_name key
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_small
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct bucket_small { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_small
+            name: default.bucket_small
+      Truncated Path -> Alias:
+        /bucket_big/ds=2008-04-08 [$hdt$_1:b]
+        /bucket_big/ds=2008-04-09 [$hdt$_1:b]
+        /bucket_small/ds=2008-04-08 [$hdt$_0:a]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: count()
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  properties:
                     column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
+                    columns _col0
+                    columns.types bigint
+                    escape.delim \
+                    serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@ds=2008-04-08
+PREHOOK: Input: default@bucket_big@ds=2008-04-09
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@ds=2008-04-08
 #### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 2
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 114
+POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@ds=2008-04-08
+POSTHOOK: Input: default@bucket_big@ds=2008-04-09
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@ds=2008-04-08
 #### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucket_count 2
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
+38
+PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                Sorted Merge Bucket Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  Position of Big Table: 1
+                  BucketMapJoin: true
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
+                      tag: -1
+                      value expressions: _col0 (type: bigint)
+                      auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+            properties:
+              bucket_count 4
+              bucket_field_name key
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_big
+              numFiles 4
+              numRows 0
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 0
+              serialization.ddl struct bucket_big { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 4
+                bucket_field_name key
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_big
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct bucket_big { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_big
+            name: default.bucket_big
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2008-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-09
+            properties:
+              bucket_count 4
+              bucket_field_name key
+              column.name.delimiter ,
+              columns key,value
+              columns.comments 
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.bucket_big
+              numFiles 4
+              numRows 0
+              partition_columns ds
+              partition_columns.types string
+              rawDataSize 0
+              serialization.ddl struct bucket_big { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                SORTBUCKETCOLSPREFIX TRUE
+                bucket_count 4
+                bucket_field_name key
+                column.name.delimiter ,
+                columns key,value
+                columns.comments 
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.bucket_big
+                partition_columns ds
+                partition_columns.types string
+                serialization.ddl struct bucket_big { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.bucket_big
+            name: default.bucket_big
+      Truncated Path -> Alias:
+        /bucket_big/ds=2008-04-08 [$hdt$_1:b]
+        /bucket_big/ds=2008-04-09 [$hdt$_1:b]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
 #### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, string value}
-                      serialization.format 1
-                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            NumFilesPerFileSink: 1
 #### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-      Alias -> Map Local Operator Tree:
-        a 
-          TableScan
-            alias: a
-            Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0
+                  columns.types bigint
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
             GatherStats: false
-            Filter Operator
-              isSamplingPred: false
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 key (type: string)
-                  1 key (type: string)
-                Position of Big Table: 1
-      Bucket Mapjoin Context:
-          Alias Bucket Base File Name Mapping:
-            a {ds=2008-04-08/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-08/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt]}
-          Alias Bucket File Name Mapping:
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@ds=2008-04-08
+PREHOOK: Input: default@bucket_big@ds=2008-04-09
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@ds=2008-04-08
 #### A masked pattern was here ####
-          Alias Bucket Output File Name Mapping:
+POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@ds=2008-04-08
+POSTHOOK: Input: default@bucket_big@ds=2008-04-09
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@ds=2008-04-08
 #### A masked pattern was here ####
+38
+PREHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
+STAGE PLANS:
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -842,29 +1255,24 @@ STAGE PLANS:
               isSamplingPred: false
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
-              Map Join Operator
+              Sorted Merge Bucket Map Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
                   0 key (type: string)
                   1 key (type: string)
                 Position of Big Table: 1
-                Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
                 BucketMapJoin: true
                 Group By Operator
                   aggregations: count()
                   mode: hash
                   outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     null sort order: 
                     sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                     tag: -1
                     value expressions: _col0 (type: bigint)
                     auto parallelism: false
-      Local Work:
-        Map Reduce Local Work
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -899,6 +1307,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -947,6 +1356,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -973,13 +1383,11 @@ STAGE PLANS:
           aggregations: count(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1025,200 +1433,10 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select /*+ MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-1 depends on stages: Stage-4
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-4
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        a 
-          Fetch Operator
-            limit: -1
-            Partition Description:
-                Partition
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                  properties:
-                    bucket_count 2
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 2
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 114
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucket_count 2
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, string value}
-                      serialization.format 1
-                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-        b 
-          Fetch Operator
-            limit: -1
-            Partition Description:
-                Partition
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                  properties:
-                    bucket_count 4
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_big
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_big { string key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 5812
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucket_count 4
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_big
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_big { string key, string value}
-                      serialization.format 1
-                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_big
-                  name: default.bucket_big
-                Partition
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-09
-                  properties:
-                    bucket_count 4
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_big
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_big { string key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 5812
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      bucket_count 4
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_big
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_big { string key, string value}
-                      serialization.format 1
-                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_big
-                  name: default.bucket_big
-      Alias -> Map Local Operator Tree:
-        a 
-          TableScan
-            alias: a
-            Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
-            GatherStats: false
-            Filter Operator
-              isSamplingPred: false
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 key (type: string)
-                  1 key (type: string)
-                  2 key (type: string)
-                Position of Big Table: 2
-        b 
-          TableScan
-            alias: b
-            Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
-            GatherStats: false
-            Filter Operator
-              isSamplingPred: false
-              predicate: key is not null (type: boolean)
-              Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 key (type: string)
-                  1 key (type: string)
-                  2 key (type: string)
-                Position of Big Table: 2
-      Bucket Mapjoin Context:
-          Alias Bucket Base File Name Mapping:
-            a {ds=2008-04-08/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-08/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt]}
-            b {ds=2008-04-08/srcsortbucket1outof4.txt=[ds=2008-04-08/srcsortbucket1outof4.txt, ds=2008-04-09/srcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket2outof4.txt=[ds=2008-04-08/srcsortbucket2outof4.txt, ds=2008-04-09/srcsortbucket2outof4.txt], ds=2008-04-08/srcsortbucket3outof4.txt=[ds=2008-04-08/srcsortbucket3outof4.txt, ds=2008-04-09/srcsortbucket3outof4.txt], ds=2008-04-08/srcsortbucket4outof4.txt=[ds=2008-04-08/srcsortbucket4outof4.txt, ds=2008-04-09/srcsortbucket4outof4.txt], ds=2008-04-09/srcsortbucket1outof4.txt=[ds=2008-04-08/srcsortbucket1outof4.txt, ds=2008-04-09/srcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket2outof4.txt=[ds=2008-04-08/srcsortbucket2outof4.txt, ds=2008-04-09/srcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket3outof4.txt=[ds=2008-04-08/srcsortbucket3outof4.txt, ds=2008-04-09/srcsortbucket3outof4.txt], ds=2008-04-09/srcsortbucket4outof4.txt=[ds=2008-04-08/srcsortbucket4outof4.txt, ds=2008-04-09/srcsortbucket4outof4.txt]}
-          Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-          Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -1230,7 +1448,7 @@ STAGE PLANS:
               isSamplingPred: false
               predicate: key is not null (type: boolean)
               Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
-              Map Join Operator
+              Sorted Merge Bucket Map Join Operator
                 condition map:
                      Inner Join 0 to 1
                      Inner Join 0 to 2
@@ -1239,22 +1457,17 @@ STAGE PLANS:
                   1 key (type: string)
                   2 key (type: string)
                 Position of Big Table: 2
-                Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
                 BucketMapJoin: true
                 Group By Operator
                   aggregations: count()
                   mode: hash
                   outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     null sort order: 
                     sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                     tag: -1
                     value expressions: _col0 (type: bigint)
                     auto parallelism: false
-      Local Work:
-        Map Reduce Local Work
       Path -> Alias:
 #### A masked pattern was here ####
       Path -> Partition:
@@ -1289,6 +1502,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -1337,6 +1551,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
+                SORTBUCKETCOLSPREFIX TRUE
                 bucket_count 4
                 bucket_field_name key
                 column.name.delimiter ,
@@ -1363,13 +1578,11 @@ STAGE PLANS:
           aggregations: count(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
             GlobalTableId: 0
 #### A masked pattern was here ####
             NumFilesPerFileSink: 1
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat