You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2017/11/07 06:27:29 UTC

[10/17] hive git commit: HIVE-17767 Rewrite correlated EXISTS/IN subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/spark/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
index b74d721..fd31b22 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
@@ -1,4 +1,3 @@
-Warning: Shuffle Join JOIN[33][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Work 'Reducer 17' is a cross product
 PREHOOK: query: explain
 select  
    count(distinct cs_order_number) as `order count`
@@ -70,7 +69,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 10 
+        Map 9 
             Map Operator Tree:
                 TableScan
                   alias: call_center
@@ -93,7 +92,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -116,16 +115,11 @@ STAGE PLANS:
     Spark
       Edges:
         Reducer 12 <- Map 11 (GROUP, 24)
-        Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009)
-        Reducer 15 <- Reducer 14 (GROUP, 1009)
-        Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 1), Map 19 (PARTITION-LEVEL SORT, 1), Map 20 (PARTITION-LEVEL SORT, 1), Map 21 (PARTITION-LEVEL SORT, 1)
-        Reducer 18 <- Reducer 17 (GROUP, 1009)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 9 (PARTITION-LEVEL SORT, 494)
-        Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 418), Reducer 2 (PARTITION-LEVEL SORT, 418)
-        Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
-        Reducer 5 <- Reducer 4 (GROUP, 1009)
-        Reducer 6 <- Reducer 5 (GROUP, 1)
-        Reducer 7 <- Reducer 6 (SORT, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494)
+        Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 723), Reducer 12 (PARTITION-LEVEL SORT, 723), Reducer 2 (PARTITION-LEVEL SORT, 723)
+        Reducer 4 <- Reducer 3 (GROUP, 447)
+        Reducer 5 <- Reducer 4 (GROUP, 1)
+        Reducer 6 <- Reducer 5 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -134,7 +128,7 @@ STAGE PLANS:
                   alias: cs1
                   Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) (type: boolean)
+                    predicate: (cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) (type: boolean)
                     Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2))
@@ -148,7 +142,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
                         input vertices:
-                          1 Map 8
+                          1 Map 7
                         Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col1 (type: int)
@@ -158,6 +152,29 @@ STAGE PLANS:
                           value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
             Local Work:
               Map Reduce Local Work
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: cs2
+                  Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cs_order_number is not null and cs_warehouse_sk is not null) (type: boolean)
+                    Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cs_order_number (type: int), cs_warehouse_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int), _col1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: int)
         Map 11 
             Map Operator Tree:
                 TableScan
@@ -176,65 +193,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
-        Map 13 
-            Map Operator Tree:
-                TableScan
-                  alias: cs2
-                  Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cs_warehouse_sk (type: int), cs_order_number (type: int)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col1 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col1 (type: int)
-                      Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int)
-        Map 16 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE
-        Map 19 
-            Map Operator Tree:
-                TableScan
-                  alias: call_center
-                  Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 60 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 60 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
-        Map 20 
-            Map Operator Tree:
-                TableScan
-                  alias: customer_address
-                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE
-        Map 21 
-            Map Operator Tree:
-                TableScan
-                  alias: cs1
-                  Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cs_warehouse_sk (type: int), cs_order_number (type: int)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int), _col1 (type: int)
-        Map 9 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
@@ -268,86 +227,6 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean)
-        Reducer 14 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: int)
-                  1 _col1 (type: int)
-                outputColumnNames: _col0, _col2, _col3
-                Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (_col2 <> _col0) (type: boolean)
-                  Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col2 (type: int), _col3 (type: int)
-                    outputColumnNames: _col2, _col3
-                    Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: _col2 (type: int), _col3 (type: int)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                        Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-        Reducer 15 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int), _col1 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                  Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE
-        Reducer 17 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                     Inner Join 0 to 2
-                     Inner Join 0 to 3
-                keys:
-                  0 
-                  1 
-                  2 
-                  3 
-                outputColumnNames: _col3, _col4
-                Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col4 (type: int), _col3 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                    Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-        Reducer 18 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: int), _col0 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: int)
         Reducer 2 
             Local Work:
               Map Reduce Local Work
@@ -368,7 +247,7 @@ STAGE PLANS:
                     1 _col0 (type: int)
                   outputColumnNames: _col3, _col4, _col5, _col6
                   input vertices:
-                    1 Map 10
+                    1 Map 9
                   Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col4 (type: int)
@@ -380,59 +259,46 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Left Semi Join 0 to 1
+                     Left Outer Join 0 to 2
                 keys:
                   0 _col4 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col3, _col4, _col5, _col6, _col14
-                Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col3 (type: int), _col4 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col3 (type: int), _col4 (type: int)
-                  Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean)
-        Reducer 4 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col3 (type: int), _col4 (type: int)
-                  1 _col0 (type: int), _col1 (type: int)
-                outputColumnNames: _col4, _col5, _col6, _col14
-                Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE
+                  2 _col0 (type: int)
+                outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16
+                residual filter predicates: {(_col3 <> _col14)}
+                Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean)
+                  expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean)
                   outputColumnNames: _col4, _col5, _col6, _col16
-                  Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: _col16 is null (type: boolean)
-                    Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
                       outputColumnNames: _col4, _col5, _col6
-                      Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: sum(_col5), sum(_col6)
                         keys: _col4 (type: int)
                         mode: hash
                         outputColumnNames: _col0, _col2, _col3
-                        Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
-        Reducer 5 
+        Reducer 4 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), sum(VALUE._col1)
                 keys: KEY._col0 (type: int)
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: count(_col0), sum(_col1), sum(_col2)
                   mode: partial2
@@ -442,7 +308,7 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
-        Reducer 6 
+        Reducer 5 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
@@ -459,7 +325,7 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
-        Reducer 7 
+        Reducer 6 
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/spark/query35.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out
index 8759b71..7031315 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query35.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out
@@ -118,18 +118,16 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 154), Map 13 (PARTITION-LEVEL SORT, 154)
-        Reducer 12 <- Reducer 11 (GROUP, 169)
-        Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 398), Map 17 (PARTITION-LEVEL SORT, 398)
-        Reducer 16 <- Reducer 15 (GROUP, 437)
-        Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 305), Map 21 (PARTITION-LEVEL SORT, 305)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855)
-        Reducer 20 <- Reducer 19 (GROUP, 336)
-        Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
-        Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 953), Reducer 16 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953)
-        Reducer 5 <- Reducer 20 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648)
-        Reducer 6 <- Reducer 5 (GROUP, 529)
-        Reducer 7 <- Reducer 6 (SORT, 1)
+        Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
+        Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154)
+        Reducer 14 <- Reducer 13 (GROUP, 169)
+        Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 305), Map 19 (PARTITION-LEVEL SORT, 305)
+        Reducer 18 <- Reducer 17 (GROUP, 336)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855)
+        Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
+        Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
+        Reducer 5 <- Reducer 4 (GROUP, 1009)
+        Reducer 6 <- Reducer 5 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -138,7 +136,7 @@ STAGE PLANS:
                   alias: c
                   Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean)
+                    predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean)
                     Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int)
@@ -150,25 +148,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col2 (type: int)
                         Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: int)
-        Map 10 
-            Map Operator Tree:
-                TableScan
-                  alias: web_sales
-                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: int)
-        Map 13 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -185,25 +165,25 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
-        Map 14 
+        Map 12 
             Map Operator Tree:
                 TableScan
-                  alias: store_sales
-                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                  alias: web_sales
+                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+                      expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
-        Map 17 
+        Map 15 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -220,7 +200,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
-        Map 18 
+        Map 16 
             Map Operator Tree:
                 TableScan
                   alias: catalog_sales
@@ -238,7 +218,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
-        Map 21 
+        Map 19 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -255,7 +235,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: ca
@@ -273,7 +253,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string)
-        Map 9 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: customer_demographics
@@ -291,7 +271,49 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int)
-        Reducer 11 
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: store_sales
+                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+        Reducer 10 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1
+                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+        Reducer 13 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -311,7 +333,7 @@ STAGE PLANS:
                     sort order: +
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
-        Reducer 12 
+        Reducer 14 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
@@ -328,7 +350,7 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean)
-        Reducer 15 
+        Reducer 17 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -337,49 +359,34 @@ STAGE PLANS:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col1
-                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   keys: _col1 (type: int)
                   mode: hash
                   outputColumnNames: _col0
-                  Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-        Reducer 16 
+                    Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+        Reducer 18 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
-        Reducer 19 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1
-                Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col1 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), true (type: boolean)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: boolean)
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -396,23 +403,6 @@ STAGE PLANS:
                   Map-reduce partition columns: _col1 (type: int)
                   Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: int), _col4 (type: string)
-        Reducer 20 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), true (type: boolean)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col1 (type: boolean)
         Reducer 3 
             Reduce Operator Tree:
               Join Operator
@@ -433,78 +423,60 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
-                     Inner Join 0 to 2
+                     Left Semi Join 0 to 1
+                     Left Outer Join 0 to 2
+                     Left Outer Join 0 to 3
                 keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                   2 _col0 (type: int)
-                outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10, _col12
-                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: boolean)
-                  outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10, _col13
-                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col13 (type: boolean)
-        Reducer 5 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Left Outer Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10, _col13, _col15
-                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                  3 _col0 (type: int)
+                outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10, _col12, _col14
+                Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: (_col13 is not null or _col15 is not null) (type: boolean)
-                  Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                  predicate: (_col12 is not null or _col14 is not null) (type: boolean)
+                  Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int)
                     outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10
-                    Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(), avg(_col8), max(_col8), sum(_col8), avg(_col9), max(_col9), sum(_col9), avg(_col10), max(_col10), sum(_col10)
                       keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
-                      Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int)
                         sort order: ++++++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int)
-                        Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col6 (type: bigint), _col7 (type: struct<count:bigint,sum:double,input:int>), _col8 (type: int), _col9 (type: bigint), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: int), _col12 (type: bigint), _col13 (type: struct<count:bigint,sum:double,input:int>), _col14 (type: int), _col15 (type: bigint)
-        Reducer 6 
+        Reducer 5 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), avg(VALUE._col7), max(VALUE._col8), sum(VALUE._col9)
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
-                Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col6 (type: bigint), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col4 (type: int), _col10 (type: double), _col11 (type: int), _col12 (type: bigint), _col5 (type: int), _col13 (type: double), _col14 (type: int), _col15 (type: bigint), _col3 (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17
-                  Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col17 (type: int), _col7 (type: int), _col12 (type: int)
                     sort order: ++++++
-                    Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: int), _col6 (type: bigint), _col9 (type: double), _col10 (type: int), _col11 (type: bigint), _col14 (type: double), _col15 (type: int), _col16 (type: bigint)
-        Reducer 7 
+        Reducer 6 
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: bigint), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: int), VALUE._col6 (type: bigint), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: bigint), VALUE._col7 (type: double), VALUE._col8 (type: int), VALUE._col9 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
-                Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 100
                   Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/spark/query69.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
index e4430be..efb3f30 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
@@ -102,7 +102,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 12 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -125,7 +125,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 15 
+        Map 14 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -148,7 +148,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 18 
+        Map 17 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -170,14 +170,13 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 11 <- Map 10 (GROUP, 169)
-        Reducer 14 <- Map 13 (GROUP, 437)
-        Reducer 17 <- Map 16 (GROUP, 336)
+        Reducer 13 <- Map 12 (GROUP, 169)
+        Reducer 16 <- Map 15 (GROUP, 336)
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL SORT, 697)
         Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597)
-        Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 953), Reducer 14 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953)
-        Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 408), Reducer 4 (PARTITION-LEVEL SORT, 408)
-        Reducer 6 <- Reducer 5 (GROUP, 133)
+        Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
+        Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648)
+        Reducer 6 <- Reducer 5 (GROUP, 265)
         Reducer 7 <- Reducer 6 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
@@ -187,7 +186,7 @@ STAGE PLANS:
                   alias: c
                   Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean)
+                    predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean)
                     Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int)
@@ -202,15 +201,15 @@ STAGE PLANS:
         Map 10 
             Map Operator Tree:
                 TableScan
-                  alias: web_sales
-                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                  alias: store_sales
+                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
+                      expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -219,32 +218,36 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         outputColumnNames: _col1
                         input vertices:
-                          1 Map 12
-                        Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
-                        Group By Operator
-                          keys: _col1 (type: int)
-                          mode: hash
+                          1 Map 11
+                        Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                        Select Operator
+                          expressions: _col1 (type: int)
                           outputColumnNames: _col0
-                          Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            key expressions: _col0 (type: int)
-                            sort order: +
-                            Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                          Group By Operator
+                            keys: _col0 (type: int)
+                            mode: hash
+                            outputColumnNames: _col0
+                            Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                            Reduce Output Operator
+                              key expressions: _col0 (type: int)
+                              sort order: +
+                              Map-reduce partition columns: _col0 (type: int)
+                              Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
             Local Work:
               Map Reduce Local Work
-        Map 13 
+        Map 12 
             Map Operator Tree:
                 TableScan
-                  alias: store_sales
-                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                  alias: web_sales
+                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
-                    Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                    predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int)
+                      expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -253,21 +256,21 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         outputColumnNames: _col1
                         input vertices:
-                          1 Map 15
-                        Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                          1 Map 14
+                        Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col1 (type: int)
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
                           Reduce Output Operator
                             key expressions: _col0 (type: int)
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
             Local Work:
               Map Reduce Local Work
-        Map 16 
+        Map 15 
             Map Operator Tree:
                 TableScan
                   alias: catalog_sales
@@ -287,7 +290,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         outputColumnNames: _col1
                         input vertices:
-                          1 Map 18
+                          1 Map 17
                         Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col1 (type: int)
@@ -336,7 +339,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string)
-        Reducer 11 
+        Reducer 13 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
@@ -353,19 +356,7 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean)
-        Reducer 14 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE
-        Reducer 17 
+        Reducer 16 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
@@ -418,31 +409,27 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
-                     Inner Join 0 to 2
+                     Left Semi Join 0 to 1
+                     Left Outer Join 0 to 2
                 keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                   2 _col0 (type: int)
                 outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col12
-                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col12 (type: boolean)
-                  outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col13
+                Statistics: Num rows: 1393909496 Data size: 122971100382 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: _col12 is null (type: boolean)
                   Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: _col13 is null (type: boolean)
-                    Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
-                      outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10
-                      Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+                  Select Operator
+                    expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
+                    outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10
+                    Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
         Reducer 5 
             Reduce Operator Tree:
               Join Operator
@@ -451,26 +438,26 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col15
-                Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col14
+                Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  predicate: _col15 is null (type: boolean)
-                  Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+                  predicate: _col14 is null (type: boolean)
+                  Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
                     outputColumnNames: _col6, _col7, _col8, _col9, _col10
-                    Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
                       keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                      Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string)
                         sort order: +++++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string)
-                        Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col5 (type: bigint)
         Reducer 6 
@@ -480,15 +467,15 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: bigint), _col3 (type: int), _col4 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
-                  Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: int), _col6 (type: string)
                     sort order: +++++
-                    Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col3 (type: bigint)
         Reducer 7 
@@ -496,7 +483,7 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 100
                   Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/spark/query94.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out
index 43b8c77..995875a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out
@@ -1,4 +1,3 @@
-Warning: Shuffle Join JOIN[33][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Work 'Reducer 17' is a cross product
 PREHOOK: query: explain
 select  
    count(distinct ws_order_number) as `order count`
@@ -66,7 +65,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 10 
+        Map 9 
             Map Operator Tree:
                 TableScan
                   alias: web_site
@@ -89,7 +88,7 @@ STAGE PLANS:
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 8 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -112,16 +111,11 @@ STAGE PLANS:
     Spark
       Edges:
         Reducer 12 <- Map 11 (GROUP, 11)
-        Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009)
-        Reducer 15 <- Reducer 14 (GROUP, 1009)
-        Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 1), Map 19 (PARTITION-LEVEL SORT, 1), Map 20 (PARTITION-LEVEL SORT, 1), Map 21 (PARTITION-LEVEL SORT, 1)
-        Reducer 18 <- Reducer 17 (GROUP, 1009)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 9 (PARTITION-LEVEL SORT, 327)
-        Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 209), Reducer 2 (PARTITION-LEVEL SORT, 209)
-        Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009)
-        Reducer 5 <- Reducer 4 (GROUP, 1009)
-        Reducer 6 <- Reducer 5 (GROUP, 1)
-        Reducer 7 <- Reducer 6 (SORT, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327)
+        Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 362), Reducer 12 (PARTITION-LEVEL SORT, 362), Reducer 2 (PARTITION-LEVEL SORT, 362)
+        Reducer 4 <- Reducer 3 (GROUP, 224)
+        Reducer 5 <- Reducer 4 (GROUP, 1)
+        Reducer 6 <- Reducer 5 (SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -130,7 +124,7 @@ STAGE PLANS:
                   alias: ws1
                   Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean)
+                    predicate: (ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean)
                     Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2))
@@ -144,7 +138,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
                         input vertices:
-                          1 Map 8
+                          1 Map 7
                         Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col1 (type: int)
@@ -154,6 +148,29 @@ STAGE PLANS:
                           value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
             Local Work:
               Map Reduce Local Work
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: ws2
+                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ws_order_number is not null and ws_warehouse_sk is not null) (type: boolean)
+                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ws_order_number (type: int), ws_warehouse_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int), _col1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: int)
         Map 11 
             Map Operator Tree:
                 TableScan
@@ -172,65 +189,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
-        Map 13 
-            Map Operator Tree:
-                TableScan
-                  alias: ws2
-                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: ws_warehouse_sk (type: int), ws_order_number (type: int)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col1 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col1 (type: int)
-                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int)
-        Map 16 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE
-        Map 19 
-            Map Operator Tree:
-                TableScan
-                  alias: web_site
-                  Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
-        Map 20 
-            Map Operator Tree:
-                TableScan
-                  alias: customer_address
-                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE
-        Map 21 
-            Map Operator Tree:
-                TableScan
-                  alias: ws1
-                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: ws_warehouse_sk (type: int), ws_order_number (type: int)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int), _col1 (type: int)
-        Map 9 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
@@ -264,86 +223,6 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 7199233 Data size: 662597045 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean)
-        Reducer 14 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: int)
-                  1 _col1 (type: int)
-                outputColumnNames: _col0, _col2, _col3
-                Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (_col2 <> _col0) (type: boolean)
-                  Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col2 (type: int), _col3 (type: int)
-                    outputColumnNames: _col2, _col3
-                    Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: _col2 (type: int), _col3 (type: int)
-                      mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                        Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE
-        Reducer 15 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int), _col1 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                  Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE
-        Reducer 17 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                     Inner Join 0 to 2
-                     Inner Join 0 to 3
-                keys:
-                  0 
-                  1 
-                  2 
-                  3 
-                outputColumnNames: _col3, _col4
-                Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col4 (type: int), _col3 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int), _col1 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                    Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-        Reducer 18 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: int), _col0 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: int)
         Reducer 2 
             Local Work:
               Map Reduce Local Work
@@ -364,7 +243,7 @@ STAGE PLANS:
                     1 _col0 (type: int)
                   outputColumnNames: _col3, _col4, _col5, _col6
                   input vertices:
-                    1 Map 10
+                    1 Map 9
                   Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col4 (type: int)
@@ -376,59 +255,46 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Left Semi Join 0 to 1
+                     Left Outer Join 0 to 2
                 keys:
                   0 _col4 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col3, _col4, _col5, _col6, _col14
-                Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col3 (type: int), _col4 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col3 (type: int), _col4 (type: int)
-                  Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean)
-        Reducer 4 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col3 (type: int), _col4 (type: int)
-                  1 _col0 (type: int), _col1 (type: int)
-                outputColumnNames: _col4, _col5, _col6, _col14
-                Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE
+                  2 _col0 (type: int)
+                outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16
+                residual filter predicates: {(_col3 <> _col14)}
+                Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean)
+                  expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean)
                   outputColumnNames: _col4, _col5, _col6, _col16
-                  Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: _col16 is null (type: boolean)
-                    Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
                       outputColumnNames: _col4, _col5, _col6
-                      Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: sum(_col5), sum(_col6)
                         keys: _col4 (type: int)
                         mode: hash
                         outputColumnNames: _col0, _col2, _col3
-                        Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
-        Reducer 5 
+        Reducer 4 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), sum(VALUE._col1)
                 keys: KEY._col0 (type: int)
                 mode: partial2
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: count(_col0), sum(_col1), sum(_col2)
                   mode: partial2
@@ -438,7 +304,7 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
-        Reducer 6 
+        Reducer 5 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
@@ -455,7 +321,7 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
-        Reducer 7 
+        Reducer 6 
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))