You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by jc...@apache.org on 2015/11/20 22:12:16 UTC

[1/2] hive git commit: HIVE-11684: Implement limit pushdown through outer join in CBO (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)

Repository: hive
Updated Branches:
  refs/heads/master 97cb0c6e0 -> 0b9009770


http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
index 937e8fc..e2ae796 100644
--- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
@@ -1767,18 +1767,22 @@ STAGE PLANS:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: y
+                  alias: z
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
-                  Select Operator
-                    expressions: key (type: string)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                    Spark HashTable Sink Operator
-                      keys:
-                        0 _col0 (type: string)
-                        1 _col0 (type: string)
-                      Position of Big Table: 1
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: value (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col1 (type: string)
+                        Position of Big Table: 1
             Local Work:
               Map Reduce Local Work
             Path -> Alias:
@@ -1786,9 +1790,12 @@ STAGE PLANS:
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: src
+                  base file name: hr=11
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-08
+                    hr 11
                   properties:
                     COLUMN_STATS_ACCURATE true
                     bucket_count -1
@@ -1796,11 +1803,13 @@ STAGE PLANS:
                     columns.comments 'default','default'
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.src
+                    name default.srcpart
                     numFiles 1
                     numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
                     rawDataSize 5312
-                    serialization.ddl struct src { string key, string value}
+                    serialization.ddl struct srcpart { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     totalSize 5812
@@ -1810,45 +1819,38 @@ STAGE PLANS:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
-                      COLUMN_STATS_ACCURATE true
                       bucket_count -1
                       columns key,value
                       columns.comments 'default','default'
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.src
-                      numFiles 1
-                      numRows 500
-                      rawDataSize 5312
-                      serialization.ddl struct src { string key, string value}
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      totalSize 5812
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.src
-                  name: default.src
+                    name: default.srcpart
+                  name: default.srcpart
             Truncated Path -> Alias:
-              /src [y]
-        Map 3 
+              /srcpart/ds=2008-04-08/hr=11 [z]
+        Map 2 
             Map Operator Tree:
                 TableScan
-                  alias: z
+                  alias: y
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: value is not null (type: boolean)
-                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: value (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col1 (type: string)
-                          1 _col0 (type: string)
-                        Position of Big Table: 0
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      keys:
+                        0 _col0 (type: string)
+                        1 _col0 (type: string)
+                      Position of Big Table: 1
             Local Work:
               Map Reduce Local Work
             Path -> Alias:
@@ -1856,12 +1858,9 @@ STAGE PLANS:
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: hr=11
+                  base file name: src
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                    hr 11
                   properties:
                     COLUMN_STATS_ACCURATE true
                     bucket_count -1
@@ -1869,13 +1868,11 @@ STAGE PLANS:
                     columns.comments 'default','default'
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.srcpart
+                    name default.src
                     numFiles 1
                     numRows 500
-                    partition_columns ds/hr
-                    partition_columns.types string:string
                     rawDataSize 5312
-                    serialization.ddl struct srcpart { string key, string value}
+                    serialization.ddl struct src { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                     totalSize 5812
@@ -1885,29 +1882,32 @@ STAGE PLANS:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
+                      COLUMN_STATS_ACCURATE true
                       bucket_count -1
                       columns key,value
                       columns.comments 'default','default'
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.srcpart
-                      partition_columns ds/hr
-                      partition_columns.types string:string
-                      serialization.ddl struct srcpart { string key, string value}
+                      name default.src
+                      numFiles 1
+                      numRows 500
+                      rawDataSize 5312
+                      serialization.ddl struct src { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 5812
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcpart
-                  name: default.srcpart
+                    name: default.src
+                  name: default.src
             Truncated Path -> Alias:
-              /srcpart/ds=2008-04-08/hr=11 [z]
+              /src [y]
 
   Stage: Stage-1
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 2 
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: x
@@ -1929,7 +1929,7 @@ STAGE PLANS:
                           1 _col0 (type: string)
                         outputColumnNames: _col1, _col2
                         input vertices:
-                          0 Map 1
+                          0 Map 2
                         Position of Big Table: 1
                         Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
@@ -1940,15 +1940,15 @@ STAGE PLANS:
                             condition map:
                                  Inner Join 0 to 1
                             keys:
-                              0 _col1 (type: string)
-                              1 _col0 (type: string)
-                            outputColumnNames: _col0, _col1, _col2
+                              0 _col0 (type: string)
+                              1 _col1 (type: string)
+                            outputColumnNames: _col0, _col3, _col4
                             input vertices:
-                              1 Map 3
-                            Position of Big Table: 0
+                              0 Map 1
+                            Position of Big Table: 1
                             Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
                             Select Operator
-                              expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string)
+                              expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string)
                               outputColumnNames: _col0, _col1, _col2
                               Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
                               File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/spark/join_merging.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join_merging.q.out b/ql/src/test/results/clientpositive/spark/join_merging.q.out
index 6c5089b..baed227 100644
--- a/ql/src/test/results/clientpositive/spark/join_merging.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_merging.q.out
@@ -114,8 +114,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
-        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -123,6 +123,20 @@ STAGE PLANS:
                 TableScan
                   alias: p1
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: p_partkey (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p1
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (p_size > 10) (type: boolean)
                     Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
@@ -136,7 +150,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: p1
@@ -151,20 +165,6 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: int)
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: p1
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_partkey (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -172,6 +172,27 @@ STAGE PLANS:
                      Left Outer Join0 to 1
                 keys:
                   0 _col0 (type: int)
+                  1 _col2 (type: int)
+                outputColumnNames: _col2, _col4
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: int), _col4 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col1, _col2, _col3
                 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
@@ -184,27 +205,6 @@ STAGE PLANS:
                     Map-reduce partition columns: _col2 (type: int)
                     Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: int), _col3 (type: int)
-        Reducer 3 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Right Outer Join0 to 1
-                keys:
-                  0 _col2 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col3
-                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: int), _col3 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin.q.out b/ql/src/test/results/clientpositive/subquery_notin.q.out
index 5563794..12bd35e 100644
--- a/ql/src/test/results/clientpositive/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notin.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: -- non agg, non corr
 explain
 select * 
@@ -18,64 +18,70 @@ where src.key not in
   )
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-1 depends on stages: Stage-3
-  Stage-2 depends on stages: Stage-1
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-4 is a root stage
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-3
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key > '2') and key is null) (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              predicate: (key > '2') (type: boolean)
+              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: bigint)
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+            predicate: _col2 is null (type: boolean)
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-1
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: key (type: string), value (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-              Reduce Output Operator
-                sort order: 
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col0 (type: string), _col1 (type: string)
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string)
           TableScan
             Reduce Output Operator
               sort order: 
@@ -88,62 +94,52 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
+            Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
             table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-2
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: string)
-          TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (key > '2') (type: boolean)
-              Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((key > '2') and key is null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: key (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
       Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Outer Join0 to 1
-          keys:
-            0 _col0 (type: string)
-            1 _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col3
-          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
-            Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE
+            predicate: (_col0 = 0) (type: boolean)
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              expressions: _col0 (type: string), _col1 (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE
                 table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -151,7 +147,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select * 
 from src 
 where src.key not in  ( select key from src s1 where s1.key > '2')
@@ -1243,7 +1239,7 @@ Manufacturer#5	almond antique medium spring khaki	6
 Manufacturer#5	almond azure blanched chiffon midnight	23
 Manufacturer#5	almond antique blue firebrick mint	31
 Manufacturer#5	almond aquamarine dodger light gainsboro	46
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: -- non agg, non corr, Group By in Parent Query
 select li.l_partkey, count(*) 
 from lineitem li 
@@ -1278,7 +1274,7 @@ POSTHOOK: Input: default@lineitem
 139636	1
 175839	1
 182052	1
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: -- alternate not in syntax
 select * 
 from src 
@@ -1442,7 +1438,7 @@ POSTHOOK: Input: default@src
 POSTHOOK: Input: default@t1_v
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@T2_v
-Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain
 select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
@@ -1452,50 +1448,12 @@ select *
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-1 depends on stages: Stage-3
-  Stage-2 depends on stages: Stage-1
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-4 is a root stage
   Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            Filter Operator
-              predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE (key) END is null) (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-              Select Operator
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          Filter Operator
-            predicate: (_col0 = 0) (type: boolean)
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
   Stage: Stage-1
     Map Reduce
       Map Operator Tree:
@@ -1510,9 +1468,52 @@ STAGE PLANS:
                 outputColumnNames: _col0
                 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  sort order: 
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean)
+              Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: _col1 is null (type: boolean)
+            Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
           TableScan
             Reduce Output Operator
               sort order: 
@@ -1525,61 +1526,52 @@ STAGE PLANS:
             0 
             1 
           outputColumnNames: _col0
-          Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
+            Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
             table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-2
+  Stage: Stage-4
     Map Reduce
       Map Operator Tree:
           TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
-          TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean)
-              Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE (key) END is null) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
       Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Left Outer Join0 to 1
-          keys:
-            0 _col0 (type: string)
-            1 _col0 (type: string)
-          outputColumnNames: _col0, _col2
-          Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col2 is null (type: boolean)
-            Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
+            predicate: (_col0 = 0) (type: boolean)
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
             Select Operator
-              expressions: _col0 (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
                 table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -1587,7 +1579,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v)
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
index 9689ae3..150b527 100644
--- a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product
 PREHOOK: query: -- non agg, non corr
 -- JAVA_VERSION_SPECIFIC_OUTPUT
 
@@ -25,9 +25,9 @@ having key not in
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-4
-  Stage-3 depends on stages: Stage-2
-  Stage-4 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2, Stage-5
+  Stage-5 is a root stage
   Stage-0 depends on stages: Stage-3
 
 STAGE PLANS:
@@ -72,38 +72,10 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: string), _col1 (type: bigint)
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Join Operator
-          condition map:
-               Inner Join 0 to 1
-          keys:
-            0 
-            1 
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
               key expressions: _col0 (type: string)
               sort order: +
               Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               value expressions: _col1 (type: bigint)
           TableScan
             alias: src
@@ -127,24 +99,48 @@ STAGE PLANS:
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col3
-          Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: _col3 is null (type: boolean)
-            Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), _col1 (type: bigint)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            predicate: _col2 is null (type: boolean)
+            Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-4
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: bigint)
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index fa29dfe..871fa4e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -3732,108 +3732,105 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE)
 
 Stage-0
    Fetch Operator
       limit:-1
       Stage-1
          Reducer 4
-         File Output Operator [FS_28]
+         File Output Operator [FS_29]
             compressed:false
-            Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
+            Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
             table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
-            Select Operator [SEL_27]
+            Select Operator [SEL_28]
             |  outputColumnNames:["_col0","_col1"]
-            |  Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
+            |  Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
             |<-Reducer 3 [SIMPLE_EDGE]
-               Reduce Output Operator [RS_26]
+               Reduce Output Operator [RS_27]
                   key expressions:_col0 (type: string)
                   sort order:+
-                  Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
+                  Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
                   value expressions:_col1 (type: string)
-                  Select Operator [SEL_25]
-                     outputColumnNames:["_col0","_col1"]
-                     Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
-                     Filter Operator [FIL_31]
-                        predicate:_col3 is null (type: boolean)
-                        Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
-                        Merge Join Operator [MERGEJOIN_36]
-                        |  condition map:[{"":"Left Outer Join0 to 1"}]
-                        |  keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"}
-                        |  outputColumnNames:["_col0","_col1","_col3"]
-                        |  Statistics:Num rows: 605 Data size: 107690 Basic stats: COMPLETE Column stats: NONE
-                        |<-Map 7 [SIMPLE_EDGE]
-                        |  Reduce Output Operator [RS_22]
-                        |     key expressions:_col0 (type: string)
-                        |     Map-reduce partition columns:_col0 (type: string)
-                        |     sort order:+
-                        |     Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                        |     Select Operator [SEL_16]
-                        |        outputColumnNames:["_col0"]
-                        |        Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                        |        Filter Operator [FIL_34]
-                        |           predicate:(key > '2') (type: boolean)
-                        |           Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
-                        |           TableScan [TS_14]
-                        |              alias:src_cbo
-                        |              Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
-                        |<-Reducer 2 [SIMPLE_EDGE]
-                           Reduce Output Operator [RS_21]
-                              key expressions:_col0 (type: string)
-                              Map-reduce partition columns:_col0 (type: string)
-                              sort order:+
-                              Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
-                              value expressions:_col1 (type: string)
-                              Merge Join Operator [MERGEJOIN_35]
-                              |  condition map:[{"":"Inner Join 0 to 1"}]
-                              |  keys:{}
-                              |  outputColumnNames:["_col0","_col1"]
-                              |  Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
-                              |<-Map 1 [SIMPLE_EDGE]
-                              |  Reduce Output Operator [RS_18]
-                              |     sort order:
-                              |     Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                              |     value expressions:_col0 (type: string), _col1 (type: string)
-                              |     Select Operator [SEL_2]
-                              |        outputColumnNames:["_col0","_col1"]
-                              |        Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                              |        TableScan [TS_0]
-                              |           alias:src_cbo
-                              |           Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
-                              |<-Reducer 6 [SIMPLE_EDGE]
-                                 Reduce Output Operator [RS_19]
+                  Merge Join Operator [MERGEJOIN_37]
+                  |  condition map:[{"":"Inner Join 0 to 1"}]
+                  |  keys:{}
+                  |  outputColumnNames:["_col0","_col1"]
+                  |  Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
+                  |<-Reducer 2 [SIMPLE_EDGE]
+                  |  Reduce Output Operator [RS_21]
+                  |     sort order:
+                  |     Statistics:Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  |     value expressions:_col0 (type: string), _col1 (type: string)
+                  |     Filter Operator [FIL_32]
+                  |        predicate:_col2 is null (type: boolean)
+                  |        Statistics:Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+                  |        Merge Join Operator [MERGEJOIN_36]
+                  |        |  condition map:[{"":"Left Outer Join0 to 1"}]
+                  |        |  keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"}
+                  |        |  outputColumnNames:["_col0","_col1","_col2"]
+                  |        |  Statistics:Num rows: 404 Data size: 107060 Basic stats: COMPLETE Column stats: COMPLETE
+                  |        |<-Map 1 [SIMPLE_EDGE]
+                  |        |  Reduce Output Operator [RS_18]
+                  |        |     key expressions:_col0 (type: string)
+                  |        |     Map-reduce partition columns:_col0 (type: string)
+                  |        |     sort order:+
+                  |        |     Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  |        |     value expressions:_col1 (type: string)
+                  |        |     Select Operator [SEL_2]
+                  |        |        outputColumnNames:["_col0","_col1"]
+                  |        |        Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  |        |        TableScan [TS_0]
+                  |        |           alias:src_cbo
+                  |        |           Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  |        |<-Map 5 [SIMPLE_EDGE]
+                  |           Reduce Output Operator [RS_19]
+                  |              key expressions:_col0 (type: string)
+                  |              Map-reduce partition columns:_col0 (type: string)
+                  |              sort order:+
+                  |              Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                  |              Select Operator [SEL_5]
+                  |                 outputColumnNames:["_col0"]
+                  |                 Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                  |                 Filter Operator [FIL_33]
+                  |                    predicate:(key > '2') (type: boolean)
+                  |                    Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                  |                    TableScan [TS_3]
+                  |                       alias:src_cbo
+                  |                       Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                  |<-Reducer 7 [SIMPLE_EDGE]
+                     Reduce Output Operator [RS_22]
+                        sort order:
+                        Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                        Select Operator [SEL_14]
+                           Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                           Filter Operator [FIL_34]
+                              predicate:(_col0 = 0) (type: boolean)
+                              Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                              Group By Operator [GBY_13]
+                              |  aggregations:["count(VALUE._col0)"]
+                              |  outputColumnNames:["_col0"]
+                              |  Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                              |<-Map 6 [SIMPLE_EDGE]
+                                 Reduce Output Operator [RS_12]
                                     sort order:
-                                    Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
-                                    Select Operator [SEL_11]
-                                       Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
-                                       Filter Operator [FIL_32]
-                                          predicate:(_col0 = 0) (type: boolean)
-                                          Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                                          Group By Operator [GBY_10]
-                                          |  aggregations:["count(VALUE._col0)"]
-                                          |  outputColumnNames:["_col0"]
-                                          |  Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                                          |<-Map 5 [SIMPLE_EDGE]
-                                             Reduce Output Operator [RS_9]
-                                                sort order:
-                                                Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                                                value expressions:_col0 (type: bigint)
-                                                Group By Operator [GBY_8]
-                                                   aggregations:["count()"]
-                                                   outputColumnNames:["_col0"]
-                                                   Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                                                   Select Operator [SEL_5]
-                                                      Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                                                      Filter Operator [FIL_33]
-                                                         predicate:((key > '2') and key is null) (type: boolean)
-                                                         Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                                                         TableScan [TS_3]
-                                                            alias:src_cbo
-                                                            Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                                    Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                                    value expressions:_col0 (type: bigint)
+                                    Group By Operator [GBY_11]
+                                       aggregations:["count()"]
+                                       outputColumnNames:["_col0"]
+                                       Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                                       Select Operator [SEL_8]
+                                          Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                                          Filter Operator [FIL_35]
+                                             predicate:((key > '2') and key is null) (type: boolean)
+                                             Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                                             TableScan [TS_6]
+                                                alias:src_cbo
+                                                Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
 
 PREHOOK: query: explain select p_mfgr, b.p_name, p_size 
 from part b 

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out
new file mode 100644
index 0000000..52b4288
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out
@@ -0,0 +1,200 @@
+PREHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: UDFToInteger(_col0) (type: int)
+                      sort order: +
+                      Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Limit
+                      Number of rows: 1
+                      Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+        Reducer 2 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                filter predicates:
+                  0 
+                  1 {(KEY.reducesinkkey0 < 100)}
+                keys:
+                  0 UDFToInteger(_col0) (type: int)
+                  1 _col2 (type: int)
+                outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  Limit
+                    Number of rows: 1
+                    Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 1
+                  Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col2 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col2 (type: int)
+                    Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: UDFToInteger(_col0) (type: int)
+                      sort order: +
+                      Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Limit
+                      Number of rows: 1
+                      Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 1
+                  Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Right Outer Join0 to 1
+                    filter predicates:
+                      0 
+                      1 {(_col2 < 100)}
+                    keys:
+                      0 UDFToInteger(_col0) (type: int)
+                      1 _col2 (type: int)
+                    outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+                    input vertices:
+                      0 Map 1
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    HybridGraceHashJoin: true
+                    Select Operator
+                      expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                      Limit
+                        Number of rows: 1
+                        Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.TextInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+

[2/2] hive git commit: HIVE-11684: Implement limit pushdown through outer join in CBO (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)

Posted by jc...@apache.org.

HIVE-11684: Implement limit pushdown through outer join in CBO (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b900977
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b900977
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b900977

Branch: refs/heads/master
Commit: 0b90097703eb57dc8449af8d7d7882b8d5ca2b40
Parents: 97cb0c6
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Nov 16 11:32:21 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Nov 20 22:11:27 2015 +0100

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  11 +
 .../test/resources/testconfiguration.properties |   1 +
 .../calcite/reloperators/HiveSortLimit.java     |  15 +-
 .../rules/HiveProjectSortTransposeRule.java     |  66 ++
 .../calcite/rules/HiveSortJoinReduceRule.java   | 135 +++
 .../calcite/rules/HiveSortMergeRule.java        |  78 ++
 .../rules/HiveSortProjectTransposeRule.java     |  79 ++
 .../calcite/rules/HiveSortRemoveRule.java       |  79 ++
 .../calcite/stats/HiveRelMdRowCount.java        |  18 +
 .../calcite/stats/HiveRelMdSelectivity.java     |  10 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  32 +-
 .../clientpositive/limit_join_transpose.q       |  99 +++
 .../clientpositive/tez_dynpart_hashjoin_3.q     |  24 +
 .../clientpositive/join32_lessSize.q.out        |  46 +-
 .../results/clientpositive/join_merging.q.out   |  34 +-
 .../clientpositive/limit_join_transpose.q.out   | 869 +++++++++++++++++++
 .../clientpositive/spark/join32_lessSize.q.out  | 122 +--
 .../clientpositive/spark/join_merging.q.out     |  76 +-
 .../results/clientpositive/subquery_notin.q.out | 328 ++++---
 .../subquery_notin_having.q.java1.7.out         |  94 +-
 .../clientpositive/tez/explainuser_1.q.out      | 171 ++--
 .../tez/tez_dynpart_hashjoin_3.q.out            | 200 +++++
 22 files changed, 2137 insertions(+), 450 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4f8209a..09f3c3e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1311,6 +1311,17 @@ public class HiveConf extends Configuration {
         "we are increasing the number of files possibly by a big margin. So, we merge aggressively."),
     HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."),
 
+    HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", false,
+        "Whether to push a limit through left/right outer join. If the value is true and the size of the outer\n" +
+        "input is reduced enough (as specified in hive.optimize.limitjointranspose.reduction), the limit is pushed\n" +
+        "to the outer input; to remain semantically correct, the limit is kept on top of the join too."),
+    HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage", 1.0f,
+        "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction of the\n" +
+        "size of the outer input of the join that we should get in order to apply the rule."),
+    HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples", (long) 0,
+        "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction in the\n" +
+        "number of tuples of the outer input of the join that you should get in order to apply the rule."),
+
     HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false,
         "Whether to create a separate plan for skewed keys for the tables in the join.\n" +
         "This is based on the skewed keys stored in the metadata. At compile time, the plan is broken\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b86dd19..0c5140c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -388,6 +388,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
   tez_insert_overwrite_local_directory_1.q,\
   tez_dynpart_hashjoin_1.q,\
   tez_dynpart_hashjoin_2.q,\
+  tez_dynpart_hashjoin_3.q,\
   tez_vector_dynpart_hashjoin_1.q,\
   tez_vector_dynpart_hashjoin_2.q,\
   tez_join_hash.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
index 82fdc0e..3077177 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
@@ -44,6 +44,8 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
   // 4. This is used by ASTConverter after we are done with Calcite Planning
   private ImmutableMap<Integer, RexNode> mapOfInputRefToRexCall;
 
+  private boolean ruleCreated;
+
   public HiveSortLimit(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
       RelCollation collation, RexNode offset, RexNode fetch) {
     super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
@@ -74,7 +76,10 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
     // TODO: can we blindly copy sort trait? What if inputs changed and we
     // are now sorting by different cols
     RelCollation canonizedCollation = traitSet.canonize(newCollation);
-    return new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+    HiveSortLimit sortLimit =
+            new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+    sortLimit.setRuleCreated(ruleCreated);
+    return sortLimit;
   }
 
   public RexNode getFetchExpr() {
@@ -93,6 +98,14 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
   public void implement(Implementor implementor) {
   }
 
+  public boolean isRuleCreated() {
+    return ruleCreated;
+  }
+
+  public void setRuleCreated(boolean ruleCreated) {
+    this.ruleCreated = ruleCreated;
+  }
+
   private static class HiveSortRelFactory implements RelFactories.SortFactory {
 
     @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java
new file mode 100644
index 0000000..9d74b19
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java
@@ -0,0 +1,66 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveProjectSortTransposeRule extends RelOptRule {
+
+  public static final HiveProjectSortTransposeRule INSTANCE =
+      new HiveProjectSortTransposeRule();
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a HiveProjectSortTransposeRule.
+   */
+  private HiveProjectSortTransposeRule() {
+    super(
+        operand(
+            HiveProject.class,
+            operand(HiveSortLimit.class, any())));
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  // implement RelOptRule
+  public void onMatch(RelOptRuleCall call) {
+    final HiveProject project = call.rel(0);
+    final HiveSortLimit sort = call.rel(1);
+
+    // Determine mapping between project input and output fields. If sort
+    // relies on non-trivial expressions, we can't push.
+    final Mappings.TargetMapping map =
+        RelOptUtil.permutation(
+            project.getProjects(), project.getInput().getRowType()).inverse();
+    for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) {
+      if (map.getTarget(fc.getFieldIndex()) < 0) {
+        return;
+      }
+    }
+
+    // Create new collation
+    final RelCollation newCollation =
+        RelCollationTraitDef.INSTANCE.canonize(
+            RexUtil.apply(map, sort.getCollation()));
+
+    // New operators
+    final RelNode newProject = project.copy(sort.getInput().getTraitSet(),
+            ImmutableList.<RelNode>of(sort.getInput()));
+    final HiveSortLimit newSort = sort.copy(newProject.getTraitSet(),
+            newProject, newCollation, sort.offset, sort.fetch);
+
+    call.transformTo(newSort);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
new file mode 100644
index 0000000..d8b9c54
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * Planner rule that pushes
+ * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit}
+ * past a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin}.
+ */
+public class HiveSortJoinReduceRule extends RelOptRule {
+
+  public static final HiveSortJoinReduceRule INSTANCE =
+          new HiveSortJoinReduceRule();
+
+  //~ Constructors -----------------------------------------------------------
+
+  private HiveSortJoinReduceRule() {
+    super(
+        operand(
+            HiveSortLimit.class,
+            operand(HiveJoin.class, any())));
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  @Override
+  public boolean matches(RelOptRuleCall call) {
+    final HiveSortLimit sortLimit = call.rel(0);
+    final HiveJoin join = call.rel(1);
+
+    // If sort does not contain a limit operation, we bail out
+    if (!HiveCalciteUtil.limitRelNode(sortLimit)) {
+      return false;
+    }
+
+    // 1) If join is not a left or right outer, we bail out
+    // 2) If any sort column is not part of the input where the
+    // sort is pushed, we bail out
+    RelNode reducedInput;
+    if (join.getJoinType() == JoinRelType.LEFT) {
+      reducedInput = join.getLeft();
+      if (sortLimit.getCollation() != RelCollations.EMPTY) {
+        for (RelFieldCollation relFieldCollation
+            : sortLimit.getCollation().getFieldCollations()) {
+          if (relFieldCollation.getFieldIndex()
+              >= join.getLeft().getRowType().getFieldCount()) {
+            return false;
+          }
+        }
+      }
+    } else if (join.getJoinType() == JoinRelType.RIGHT) {
+      reducedInput = join.getRight();
+      if (sortLimit.getCollation() != RelCollations.EMPTY) {
+        for (RelFieldCollation relFieldCollation
+            : sortLimit.getCollation().getFieldCollations()) {
+          if (relFieldCollation.getFieldIndex()
+              < join.getLeft().getRowType().getFieldCount()) {
+            return false;
+          }
+        }
+      }
+    } else {
+      return false;
+    }
+
+    // Finally, if we do not reduce the input size, we bail out
+    if (RexLiteral.intValue(sortLimit.fetch)
+            >= RelMetadataQuery.getRowCount(reducedInput)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    final HiveSortLimit sortLimit = call.rel(0);
+    final HiveJoin join = call.rel(1);
+    RelNode inputLeft = join.getLeft();
+    RelNode inputRight = join.getRight();
+
+    // We create a new sort operator on the corresponding input
+    if (join.getJoinType() == JoinRelType.LEFT) {
+      inputLeft = sortLimit.copy(sortLimit.getTraitSet(), inputLeft,
+              sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch);
+      ((HiveSortLimit) inputLeft).setRuleCreated(true);
+    } else {
+      // Adjust right collation
+      final RelCollation rightCollation =
+              RelCollationTraitDef.INSTANCE.canonize(
+                  RelCollations.shift(sortLimit.getCollation(),
+                      -join.getLeft().getRowType().getFieldCount()));
+      inputRight = sortLimit.copy(sortLimit.getTraitSet().replace(rightCollation), inputRight,
+              rightCollation, sortLimit.offset, sortLimit.fetch);
+      ((HiveSortLimit) inputRight).setRuleCreated(true);
+    }
+    // We copy the join and the top sort operator
+    RelNode result = join.copy(join.getTraitSet(), join.getCondition(), inputLeft,
+            inputRight, join.getJoinType(), join.isSemiJoinDone());
+    result = sortLimit.copy(sortLimit.getTraitSet(), result, sortLimit.getCollation(),
+            sortLimit.offset, sortLimit.fetch);
+
+    call.transformTo(result);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java
new file mode 100644
index 0000000..ebb20c2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java
@@ -0,0 +1,78 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * This rule will merge two HiveSortLimit operators.
+ * 
+ * It is applied when the top match is a pure limit operation (no sorting).
+ * 
+ * If the bottom operator is not synthetic and does not contain a limit,
+ * we currently bail out. Thus, we avoid a lot of unnecessary limit operations
+ * in the middle of the execution plan that could create performance regressions.
+ */
+public class HiveSortMergeRule extends RelOptRule {
+
+  public static final HiveSortMergeRule INSTANCE =
+      new HiveSortMergeRule();
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a HiveSortProjectTransposeRule.
+   */
+  private HiveSortMergeRule() {
+    super(
+        operand(
+            HiveSortLimit.class,
+            operand(HiveSortLimit.class, any())));
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  @Override
+  public boolean matches(RelOptRuleCall call) {
+    final HiveSortLimit topSortLimit = call.rel(0);
+    final HiveSortLimit bottomSortLimit = call.rel(1);
+
+    // If top operator is not a pure limit, we bail out
+    if (!HiveCalciteUtil.pureLimitRelNode(topSortLimit)) {
+      return false;
+    }
+
+    // If the bottom operator is not synthetic and it does not contain a limit,
+    // we will bail out; we do not want to end up with limits all over the tree
+    if (topSortLimit.isRuleCreated() && !bottomSortLimit.isRuleCreated() &&
+            bottomSortLimit.fetch == null) {
+      return false;
+    }
+
+    return true;
+  }
+
+  // implement RelOptRule
+  public void onMatch(RelOptRuleCall call) {
+    final HiveSortLimit topSortLimit = call.rel(0);
+    final HiveSortLimit bottomSortLimit = call.rel(1);
+
+    // Lowest limit
+    final RexNode newLimit;
+    if (bottomSortLimit.fetch != null && RexLiteral.intValue(topSortLimit.fetch)
+            >= RexLiteral.intValue(bottomSortLimit.fetch)) {
+      newLimit = bottomSortLimit.fetch;
+    } else {
+      newLimit = topSortLimit.fetch;
+    }
+
+    final HiveSortLimit newSort = bottomSortLimit.copy(bottomSortLimit.getTraitSet(),
+            bottomSortLimit.getInput(), bottomSortLimit.collation, null, newLimit);
+
+    call.transformTo(newSort);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java
new file mode 100644
index 0000000..debfbe0
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java
@@ -0,0 +1,79 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveSortProjectTransposeRule extends RelOptRule {
+
+  public static final HiveSortProjectTransposeRule INSTANCE =
+      new HiveSortProjectTransposeRule();
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a HiveSortProjectTransposeRule.
+   */
+  private HiveSortProjectTransposeRule() {
+    super(
+        operand(
+            HiveSortLimit.class,
+            operand(HiveProject.class, any())));
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  @Override
+  public boolean matches(RelOptRuleCall call) {
+    final HiveSortLimit sortLimit = call.rel(0);
+
+    // If does not contain a limit operation, we bail out
+    if (!HiveCalciteUtil.limitRelNode(sortLimit)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  // implement RelOptRule
+  public void onMatch(RelOptRuleCall call) {
+    final HiveSortLimit sort = call.rel(0);
+    final HiveProject project = call.rel(1);
+
+    // Determine mapping between project input and output fields. If sort
+    // relies on non-trivial expressions, we can't push.
+    final Mappings.TargetMapping map =
+        RelOptUtil.permutation(
+            project.getProjects(), project.getInput().getRowType());
+    for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) {
+      if (map.getTargetOpt(fc.getFieldIndex()) < 0) {
+        return;
+      }
+    }
+
+    // Create new collation
+    final RelCollation newCollation =
+        RelCollationTraitDef.INSTANCE.canonize(
+            RexUtil.apply(map, sort.getCollation()));
+
+    // New operators
+    final HiveSortLimit newSort = sort.copy(sort.getTraitSet().replace(newCollation),
+            project.getInput(), newCollation, sort.offset, sort.fetch);
+    final RelNode newProject = project.copy(sort.getTraitSet(),
+            ImmutableList.<RelNode>of(newSort));
+
+    call.transformTo(newProject);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
new file mode 100644
index 0000000..618c717
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * Planner rule that removes
+ * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit}.
+ */
+public class HiveSortRemoveRule extends RelOptRule {
+
+  protected final float reductionProportion;
+  protected final float reductionTuples;
+
+  //~ Constructors -----------------------------------------------------------
+
+  public HiveSortRemoveRule(float reductionProportion, long reductionTuples) {
+    this(operand(HiveSortLimit.class, any()), reductionProportion, reductionTuples);
+  }
+
+  private HiveSortRemoveRule(RelOptRuleOperand operand, float reductionProportion,
+          long reductionTuples) {
+    super(operand);
+    this.reductionProportion = reductionProportion;
+    this.reductionTuples = reductionTuples;
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  @Override
+  public boolean matches(RelOptRuleCall call) {
+    final HiveSortLimit sortLimit = call.rel(0);
+
+    // If it is not created by HiveSortJoinReduceRule, we cannot remove it
+    if (!sortLimit.isRuleCreated()) {
+      return false;
+    }
+
+    // Finally, if we do not reduce the size input enough, we bail out
+    int limit = RexLiteral.intValue(sortLimit.fetch);
+    Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput());
+    if (rowCount != null && limit <= reductionProportion * rowCount &&
+            rowCount - limit >= reductionTuples) {
+      return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    final HiveSortLimit sortLimit = call.rel(0);
+
+    // We remove the limit operator
+    call.transformTo(sortLimit.getInput());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index 728c5aa..caf8978 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -30,6 +30,7 @@ import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rel.core.TableScan;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdRowCount;
@@ -38,6 +39,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
@@ -87,6 +89,22 @@ public class HiveRelMdRowCount extends RelMdRowCount {
     return super.getRowCount(rel);
   }
 
+  @Override
+  public Double getRowCount(Sort rel) {
+    final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput());
+    if (rowCount != null && rel.fetch != null) {
+      final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset);
+      final int limit = RexLiteral.intValue(rel.fetch);
+      final Double offsetLimit = new Double(offset + limit);
+      // offsetLimit is smaller than rowCount of the input operator
+      // thus, we return the offsetLimit
+      if (offsetLimit < rowCount) {
+        return offsetLimit;
+      }
+    }
+    return rowCount;
+  }
+
   static class PKFKRelationInfo {
     public final int fkSide;
     public final double ndvScalingFactor;

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
index 715f24f..a0eb83d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
@@ -61,6 +61,16 @@ public class HiveRelMdSelectivity extends RelMdSelectivity {
   public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException {
     if (j.getJoinType().equals(JoinRelType.INNER)) {
       return computeInnerJoinSelectivity(j, predicate);
+    } else if (j.getJoinType().equals(JoinRelType.LEFT) ||
+            j.getJoinType().equals(JoinRelType.RIGHT)) {
+      double left = RelMetadataQuery.getRowCount(j.getLeft());
+      double right = RelMetadataQuery.getRowCount(j.getRight());
+      double product = left * right;
+      double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate);
+      if (j.getJoinType().equals(JoinRelType.LEFT)) {
+        return Math.max(innerJoinSelectivity, left/product);
+      }
+      return Math.max(innerJoinSelectivity, right/product);
     }
     return 1.0;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index de67b54..bfe4633 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -148,8 +148,13 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter;
@@ -953,16 +958,31 @@ public class CalcitePlanner extends SemanticAnalyzer {
         basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE);
       }
 
-      // 1. Push Down Semi Joins
+      // 1. Push down limit through outer join
+      if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) {
+        // This should be a cost based decision, but till we enable the extended cost
+        // model, we will use the given value for the variable
+        final float reductionProportion = HiveConf.getFloatVar(conf,
+            HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE);
+        final long reductionTuples = HiveConf.getLongVar(conf,
+            HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES);
+        basePlan = hepPlan(basePlan, true, mdProvider, HiveSortMergeRule.INSTANCE,
+            HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE);
+        basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP,
+            new HiveSortRemoveRule(reductionProportion, reductionTuples),
+            HiveProjectSortTransposeRule.INSTANCE);
+      }
+
+      // 2. Push Down Semi Joins
       basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE,
           SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
 
-      // 2. Add not null filters
+      // 3. Add not null filters
       if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
         basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE);
       }
 
-      // 3. Constant propagation, common filter extraction, and PPD
+      // 4. Constant propagation, common filter extraction, and PPD
       basePlan = hepPlan(basePlan, true, mdProvider,
           ReduceExpressionsRule.PROJECT_INSTANCE,
           ReduceExpressionsRule.FILTER_INSTANCE,
@@ -976,12 +996,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
           new FilterAggregateTransposeRule(Filter.class,
               HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class));
 
-      // 4. Transitive inference & Partition Pruning
+      // 5. Transitive inference & Partition Pruning
       basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule(
           Join.class, HiveFilter.DEFAULT_FILTER_FACTORY),
           new HivePartitionPruneRule(conf));
 
-      // 5. Projection Pruning
+      // 6. Projection Pruning
       HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
           cluster, HiveProject.DEFAULT_PROJECT_FACTORY,
           HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY,
@@ -989,7 +1009,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
           HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);
       basePlan = fieldTrimmer.trim(basePlan);
 
-      // 6. Rerun PPD through Project as column pruning would have introduced DT
+      // 7. Rerun PPD through Project as column pruning would have introduced DT
       // above scans
       basePlan = hepPlan(basePlan, true, mdProvider,
           new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY,

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/queries/clientpositive/limit_join_transpose.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/limit_join_transpose.q b/ql/src/test/queries/clientpositive/limit_join_transpose.q
new file mode 100644
index 0000000..bfc2378
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/limit_join_transpose.q
@@ -0,0 +1,99 @@
+set hive.optimize.limitjointranspose=false;
+
+explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+
+set hive.optimize.limitjointranspose=true;
+set hive.optimize.limitjointranspose.reductionpercentage=0.0001f;
+set hive.optimize.limitjointranspose.reductiontuples=10;
+
+explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+
+set hive.optimize.limitjointranspose.reductionpercentage=0.1f;
+set hive.optimize.limitjointranspose.reductiontuples=10;
+
+explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+explain
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+set hive.optimize.limitjointranspose.reductionpercentage=1f;
+set hive.optimize.limitjointranspose.reductiontuples=0;
+
+explain
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+explain
+select *
+from src src1 right outer join (
+  select src2.key, src2.value
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1;
+
+select *
+from src src1 right outer join (
+  select src2.key, src2.value
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
new file mode 100644
index 0000000..1994b40
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
@@ -0,0 +1,24 @@
+set hive.optimize.limitjointranspose=true;
+set hive.optimize.limitjointranspose.reductionpercentage=0.1f;
+set hive.optimize.limitjointranspose.reductiontuples=100;
+set hive.explain.user=false;
+set hive.auto.convert.join=false;
+set hive.optimize.dynamic.partition.hashjoin=false;
+
+explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1;
+
+
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join.noconditionaltask.size=200000;
+set hive.exec.reducers.bytes.per.reducer=200000;
+
+explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out
index 5ea4024..8e71710 100644
--- a/ql/src/test/results/clientpositive/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out
@@ -1920,22 +1920,22 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-9 is a root stage
-  Stage-7 depends on stages: Stage-9
-  Stage-8 depends on stages: Stage-7
+  Stage-8 is a root stage
   Stage-6 depends on stages: Stage-8
-  Stage-0 depends on stages: Stage-6
-  Stage-3 depends on stages: Stage-0
+  Stage-7 depends on stages: Stage-6
+  Stage-5 depends on stages: Stage-7
+  Stage-0 depends on stages: Stage-5
+  Stage-2 depends on stages: Stage-0
 
 STAGE PLANS:
-  Stage: Stage-9
+  Stage: Stage-8
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_0:$hdt$_0:y 
+        $hdt$_1:$hdt$_1:y 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_0:$hdt$_0:y 
+        $hdt$_1:$hdt$_1:y 
           TableScan
             alias: y
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1950,7 +1950,7 @@ STAGE PLANS:
                   1 _col0 (type: string)
                 Position of Big Table: 1
 
-  Stage: Stage-7
+  Stage: Stage-6
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -2089,12 +2089,12 @@ STAGE PLANS:
               name: default.src1
             name: default.src1
       Truncated Path -> Alias:
-        /src1 [$hdt$_0:$hdt$_1:x]
+        /src1 [$hdt$_1:$hdt$_2:x]
 
-  Stage: Stage-8
+  Stage: Stage-7
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_1:z 
+        $hdt$_0:z 
           Fetch Operator
             limit: -1
             Partition Description:
@@ -2144,7 +2144,7 @@ STAGE PLANS:
                     name: default.srcpart
                   name: default.srcpart
       Alias -> Map Local Operator Tree:
-        $hdt$_1:z 
+        $hdt$_0:z 
           TableScan
             alias: z
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -2159,11 +2159,11 @@ STAGE PLANS:
                 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 HashTable Sink Operator
                   keys:
-                    0 _col1 (type: string)
-                    1 _col0 (type: string)
-                  Position of Big Table: 0
+                    0 _col0 (type: string)
+                    1 _col1 (type: string)
+                  Position of Big Table: 1
 
-  Stage: Stage-6
+  Stage: Stage-5
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -2172,13 +2172,13 @@ STAGE PLANS:
               condition map:
                    Inner Join 0 to 1
               keys:
-                0 _col1 (type: string)
-                1 _col0 (type: string)
-              outputColumnNames: _col0, _col1, _col2
-              Position of Big Table: 0
+                0 _col0 (type: string)
+                1 _col1 (type: string)
+              outputColumnNames: _col0, _col3, _col4
+              Position of Big Table: 1
               Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string)
+                expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string)
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
@@ -2313,7 +2313,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: default.dest_j2
 
-  Stage: Stage-3
+  Stage: Stage-2
     Stats-Aggr Operator
 #### A masked pattern was here ####
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/join_merging.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out
index b42bdc5..9ab9587 100644
--- a/ql/src/test/results/clientpositive/join_merging.q.out
+++ b/ql/src/test/results/clientpositive/join_merging.q.out
@@ -97,12 +97,12 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey
               p1.p_size > 10 and p1.p_size > p2.p_size + 10
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1
-  Stage-0 depends on stages: Stage-2
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
@@ -153,17 +153,10 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                   serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
-  Stage: Stage-2
+  Stage: Stage-1
     Map Reduce
       Map Operator Tree:
           TableScan
-            Reduce Output Operator
-              key expressions: _col2 (type: int)
-              sort order: +
-              Map-reduce partition columns: _col2 (type: int)
-              Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col1 (type: int), _col3 (type: int)
-          TableScan
             alias: p1
             Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
             Select Operator
@@ -175,17 +168,24 @@ STAGE PLANS:
                 sort order: +
                 Map-reduce partition columns: _col0 (type: int)
                 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col2 (type: int)
+              Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: int), _col3 (type: int)
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Right Outer Join0 to 1
+               Left Outer Join0 to 1
           keys:
-            0 _col2 (type: int)
-            1 _col0 (type: int)
-          outputColumnNames: _col1, _col3
+            0 _col0 (type: int)
+            1 _col2 (type: int)
+          outputColumnNames: _col2, _col4
           Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
           Select Operator
-            expressions: _col1 (type: int), _col3 (type: int)
+            expressions: _col2 (type: int), _col4 (type: int)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
             File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/limit_join_transpose.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/limit_join_transpose.q.out b/ql/src/test/results/clientpositive/limit_join_transpose.q.out
new file mode 100644
index 0000000..8717154
--- /dev/null
+++ b/ql/src/test/results/clientpositive/limit_join_transpose.q.out
@@ -0,0 +1,869 @@
+PREHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	0	val_0
+PREHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	0	val_0
+PREHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 1
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238	238	val_238
+PREHOOK: query: explain
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 1
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col2 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col2 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+            Limit
+              Number of rows: 1
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238	238	val_238	238	val_238
+PREHOOK: query: explain
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 1
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col1 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col2 (type: string)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col2 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 right outer join (
+  select *
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238	238	val_238	238	val_238
+PREHOOK: query: explain
+select *
+from src src1 right outer join (
+  select src2.key, src2.value
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 right outer join (
+  select src2.key, src2.value
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-3 depends on stages: Stage-5
+  Stage-4 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-4
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: value (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: string)
+              sort order: +
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *
+from src src1 right outer join (
+  select src2.key, src2.value
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 right outer join (
+  select src2.key, src2.value
+  from src src2 left outer join src src3
+  on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0	0	val_0