You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/07/25 23:16:35 UTC
hive git commit: HIVE-14326: Merging outer joins without conditions can lead to wrong results (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 964b8a001 -> 30b40c49a


HIVE-14326: Merging outer joins without conditions can lead to wrong results (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/30b40c49
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/30b40c49
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/30b40c49

Branch: refs/heads/master
Commit: 30b40c49a8381dff5be79496247433079856e0ef
Parents: 964b8a0
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Jul 25 14:05:04 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Jul 26 00:15:44 2016 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   4 +
 .../queries/clientpositive/cross_join_merge.q   |   1 +
 .../clientpositive/cross_join_merge.q.out       |  44 +++++--
 .../clientpositive/ppd_outer_join5.q.out        | 127 +++++++++++++++----
 .../clientpositive/spark/ppd_outer_join5.q.out  |  97 ++++++++++----
 5 files changed, 208 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 698efdc..2671cb1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8922,6 +8922,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   private ObjectPair<Integer, int[]> findMergePos(QBJoinTree node, QBJoinTree target) {
     int res = -1;
     String leftAlias = node.getLeftAlias();
+    if (leftAlias == null && (!node.getNoOuterJoin() || !target.getNoOuterJoin())) {
+      // Cross with outer join: currently we do not merge
+      return new ObjectPair(-1, null);
+    }
 
     ArrayList<ASTNode> nodeCondn = node.getExpressions().get(0);
     ArrayList<ASTNode> targetCondn = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/queries/clientpositive/cross_join_merge.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cross_join_merge.q b/ql/src/test/queries/clientpositive/cross_join_merge.q
index 3ba4727..50f5813 100644
--- a/ql/src/test/queries/clientpositive/cross_join_merge.q
+++ b/ql/src/test/queries/clientpositive/cross_join_merge.q
@@ -10,6 +10,7 @@ select src1.key from src src1 join src src2 join src src3 where src1.key=src2.ke
 explain
 select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key;
 
+-- no merge
 explain
 select src1.key from src src1 left outer join src src2 join src src3;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/results/clientpositive/cross_join_merge.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cross_join_merge.q.out b/ql/src/test/results/clientpositive/cross_join_merge.q.out
index ccf2ff6..f15161a 100644
--- a/ql/src/test/results/clientpositive/cross_join_merge.q.out
+++ b/ql/src/test/results/clientpositive/cross_join_merge.q.out
@@ -337,16 +337,20 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
-PREHOOK: query: explain
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- no merge
+explain
 select src1.key from src src1 left outer join src src2 join src src3
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: -- no merge
+explain
 select src1.key from src src1 left outer join src src2 join src src3
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -371,6 +375,30 @@ STAGE PLANS:
               Reduce Output Operator
                 sort order: 
                 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0
+          Statistics: Num rows: 250000 Data size: 3906000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 250000 Data size: 3906000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
           TableScan
             alias: src1
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -382,17 +410,15 @@ STAGE PLANS:
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Left Outer Join0 to 1
-               Inner Join 0 to 2
+               Inner Join 0 to 1
           keys:
             0 
             1 
-            2 
           outputColumnNames: _col0
-          Statistics: Num rows: 125000000 Data size: 2453000000 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 125000000 Data size: 2578000000 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 125000000 Data size: 2453000000 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 125000000 Data size: 2578000000 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_outer_join5.q.out b/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
index 6658cfb..cbc0e89 100644
--- a/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
@@ -30,14 +30,16 @@ POSTHOOK: query: create table t4 (id int, key string, value string)
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@t4
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -71,6 +73,33 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col1 (type: string), _col2 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          filter predicates:
+            0 
+            1 {true}
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col4, _col5
+          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
           TableScan
             alias: t3
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -88,25 +117,19 @@ STAGE PLANS:
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Right Outer Join0 to 1
-               Inner Join 0 to 2
-          filter predicates:
-            0 
-            1 {true}
-            2 
+               Inner Join 0 to 1
           keys:
             0 
             1 
-            2 
           outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8
-          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-            Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -118,14 +141,16 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -159,6 +184,30 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col1 (type: string), _col2 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col1, _col2, _col4, _col5
+          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
           TableScan
             alias: t3
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -176,21 +225,19 @@ STAGE PLANS:
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Inner Join 0 to 1
-               Left Outer Join0 to 2
+               Left Outer Join0 to 1
           keys:
             0 
             1 
-            2 
           outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
-          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-            Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -202,14 +249,16 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -243,6 +292,30 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col1 (type: string), _col2 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col1, _col2, _col4, _col5
+          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
           TableScan
             alias: t3
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -260,21 +333,19 @@ STAGE PLANS:
       Reduce Operator Tree:
         Join Operator
           condition map:
-               Inner Join 0 to 1
-               Left Outer Join0 to 2
+               Left Outer Join0 to 1
           keys:
             0 
             1 
-            2 
           outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
-          Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-            Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
               table:
                   input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
index f494f40..ae266e5 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
@@ -30,7 +30,8 @@ POSTHOOK: query: create table t4 (id int, key string, value string)
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@t4
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
 PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
@@ -43,7 +44,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -62,7 +64,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: t2
@@ -78,7 +80,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: t3
@@ -99,24 +101,35 @@ STAGE PLANS:
               Join Operator
                 condition map:
                      Right Outer Join0 to 1
-                     Inner Join 0 to 2
                 filter predicates:
                   0 
                   1 {true}
-                  2 
                 keys:
                   0 
                   1 
-                  2 
-                outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8
+                outputColumnNames: _col0, _col1, _col2, _col4, _col5
                 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8
+                Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -128,7 +141,8 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
 PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
@@ -141,7 +155,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -160,7 +175,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: t2
@@ -176,7 +191,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: t3
@@ -197,20 +212,32 @@ STAGE PLANS:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
-                     Left Outer Join0 to 2
                 keys:
                   0 
                   1 
-                  2 
-                outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+                outputColumnNames: _col1, _col2, _col4, _col5
                 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -222,7 +249,8 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
 PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
@@ -235,7 +263,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -254,7 +283,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: t2
@@ -270,7 +299,7 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: t3
@@ -291,20 +320,32 @@ STAGE PLANS:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
-                     Left Outer Join0 to 2
                 keys:
                   0 
                   1 
-                  2 
-                outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+                outputColumnNames: _col1, _col2, _col4, _col5
                 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat