You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2017/11/07 06:27:28 UTC

[09/17] hive git commit: HIVE-17767 Rewrite correlated EXISTS/IN subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/tez/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
index cf3651b..457e55e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
@@ -117,172 +117,160 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
-Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
-Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
+Reducer 10 <- Map 11 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
+Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
 Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
-Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE)
-Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:100
     Stage-1
-      Reducer 7
-      File Output Operator [FS_74]
-        Limit [LIM_73] (rows=100 width=88)
+      Reducer 6
+      File Output Operator [FS_69]
+        Limit [LIM_68] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_72] (rows=383325119 width=88)
+          Select Operator [SEL_67] (rows=1045432122 width=88)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-          <-Reducer 6 [SIMPLE_EDGE]
-            SHUFFLE [RS_71]
-              Select Operator [SEL_70] (rows=383325119 width=88)
+          <-Reducer 5 [SIMPLE_EDGE]
+            SHUFFLE [RS_66]
+              Select Operator [SEL_65] (rows=1045432122 width=88)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"]
-                Group By Operator [GBY_69] (rows=383325119 width=88)
+                Group By Operator [GBY_64] (rows=1045432122 width=88)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7
-                <-Reducer 5 [SIMPLE_EDGE]
-                  SHUFFLE [RS_68]
+                <-Reducer 4 [SIMPLE_EDGE]
+                  SHUFFLE [RS_63]
                     PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                    Group By Operator [GBY_67] (rows=766650239 width=88)
+                    Group By Operator [GBY_62] (rows=2090864244 width=88)
                       Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                      Select Operator [SEL_66] (rows=766650239 width=88)
+                      Select Operator [SEL_61] (rows=2090864244 width=88)
                         Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-                        Filter Operator [FIL_65] (rows=766650239 width=88)
-                          predicate:(_col16 is not null or _col18 is not null)
-                          Merge Join Operator [MERGEJOIN_112] (rows=766650239 width=88)
-                            Conds:RS_62._col0=RS_63._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col16","_col18"]
-                          <-Reducer 18 [ONE_TO_ONE_EDGE]
-                            FORWARD [RS_63]
+                        Filter Operator [FIL_60] (rows=2090864244 width=88)
+                          predicate:(_col15 is not null or _col17 is not null)
+                          Merge Join Operator [MERGEJOIN_108] (rows=2090864244 width=88)
+                            Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_57._col0(Left Outer),RS_55._col0=RS_58._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
+                          <-Reducer 10 [SIMPLE_EDGE]
+                            SHUFFLE [RS_56]
                               PartitionCols:_col0
-                              Select Operator [SEL_61] (rows=158394413 width=135)
+                              Group By Operator [GBY_54] (rows=633595212 width=88)
+                                Output:["_col0"],keys:_col0
+                                Select Operator [SEL_18] (rows=633595212 width=88)
+                                  Output:["_col0"]
+                                  Merge Join Operator [MERGEJOIN_105] (rows=633595212 width=88)
+                                    Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"]
+                                  <-Map 11 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_16]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_14] (rows=4058 width=1119)
+                                        Output:["_col0"]
+                                        Filter Operator [FIL_98] (rows=4058 width=1119)
+                                          predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7)
+                                          TableScan [TS_12] (rows=73049 width=1119)
+                                            default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                                  <-Map 9 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_15]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_11] (rows=575995635 width=88)
+                                        Output:["_col0","_col1"]
+                                        Filter Operator [FIL_97] (rows=575995635 width=88)
+                                          predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+                                          TableScan [TS_9] (rows=575995635 width=88)
+                                            default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+                          <-Reducer 13 [ONE_TO_ONE_EDGE]
+                            FORWARD [RS_57]
+                              PartitionCols:_col0
+                              Select Operator [SEL_32] (rows=79201469 width=135)
                                 Output:["_col0","_col1"]
-                                Group By Operator [GBY_60] (rows=158394413 width=135)
+                                Group By Operator [GBY_31] (rows=79201469 width=135)
                                   Output:["_col0"],keys:KEY._col0
-                                <-Reducer 17 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_59]
+                                <-Reducer 12 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_30]
                                     PartitionCols:_col0
-                                    Group By Operator [GBY_58] (rows=316788826 width=135)
+                                    Group By Operator [GBY_29] (rows=158402938 width=135)
                                       Output:["_col0"],keys:_col1
-                                      Merge Join Operator [MERGEJOIN_110] (rows=316788826 width=135)
-                                        Conds:RS_54._col0=RS_55._col0(Inner),Output:["_col1"]
+                                      Merge Join Operator [MERGEJOIN_106] (rows=158402938 width=135)
+                                        Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1"]
+                                      <-Map 11 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_26]
+                                          PartitionCols:_col0
+                                           Please refer to the previous Select Operator [SEL_14]
                                       <-Map 16 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_55]
+                                        SHUFFLE [RS_25]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_28] (rows=4058 width=1119)
-                                            Output:["_col0"]
-                                            Filter Operator [FIL_103] (rows=4058 width=1119)
-                                              predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7)
-                                              TableScan [TS_26] (rows=73049 width=1119)
-                                                default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
-                                      <-Map 19 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_54]
+                                          Select Operator [SEL_21] (rows=144002668 width=135)
+                                            Output:["_col0","_col1"]
+                                            Filter Operator [FIL_99] (rows=144002668 width=135)
+                                              predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
+                                              TableScan [TS_19] (rows=144002668 width=135)
+                                                default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+                          <-Reducer 15 [ONE_TO_ONE_EDGE]
+                            FORWARD [RS_58]
+                              PartitionCols:_col0
+                              Select Operator [SEL_46] (rows=158394413 width=135)
+                                Output:["_col0","_col1"]
+                                Group By Operator [GBY_45] (rows=158394413 width=135)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Reducer 14 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_44]
+                                    PartitionCols:_col0
+                                    Group By Operator [GBY_43] (rows=316788826 width=135)
+                                      Output:["_col0"],keys:_col1
+                                      Merge Join Operator [MERGEJOIN_107] (rows=316788826 width=135)
+                                        Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1"]
+                                      <-Map 11 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_40]
+                                          PartitionCols:_col0
+                                           Please refer to the previous Select Operator [SEL_14]
+                                      <-Map 17 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_39]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_50] (rows=287989836 width=135)
+                                          Select Operator [SEL_35] (rows=287989836 width=135)
                                             Output:["_col0","_col1"]
-                                            Filter Operator [FIL_104] (rows=287989836 width=135)
+                                            Filter Operator [FIL_101] (rows=287989836 width=135)
                                               predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
-                                              TableScan [TS_48] (rows=287989836 width=135)
+                                              TableScan [TS_33] (rows=287989836 width=135)
                                                 default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
-                          <-Reducer 4 [SIMPLE_EDGE]
-                            SHUFFLE [RS_62]
+                          <-Reducer 3 [SIMPLE_EDGE]
+                            SHUFFLE [RS_55]
                               PartitionCols:_col0
-                              Select Operator [SEL_47] (rows=696954748 width=88)
-                                Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col16"]
-                                Merge Join Operator [MERGEJOIN_111] (rows=696954748 width=88)
-                                  Conds:RS_43._col0=RS_44._col0(Left Outer),RS_43._col0=RS_45._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"]
-                                <-Reducer 12 [ONE_TO_ONE_EDGE]
-                                  FORWARD [RS_44]
-                                    PartitionCols:_col0
-                                    Select Operator [SEL_22] (rows=79201469 width=135)
-                                      Output:["_col0","_col1"]
-                                      Group By Operator [GBY_21] (rows=79201469 width=135)
-                                        Output:["_col0"],keys:KEY._col0
-                                      <-Reducer 11 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_20]
-                                          PartitionCols:_col0
-                                          Group By Operator [GBY_19] (rows=158402938 width=135)
-                                            Output:["_col0"],keys:_col1
-                                            Merge Join Operator [MERGEJOIN_108] (rows=158402938 width=135)
-                                              Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"]
-                                            <-Map 16 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_16]
-                                                PartitionCols:_col0
-                                                 Please refer to the previous Select Operator [SEL_28]
-                                            <-Map 10 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_15]
-                                                PartitionCols:_col0
-                                                Select Operator [SEL_11] (rows=144002668 width=135)
-                                                  Output:["_col0","_col1"]
-                                                  Filter Operator [FIL_100] (rows=144002668 width=135)
-                                                    predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
-                                                    TableScan [TS_9] (rows=144002668 width=135)
-                                                      default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
-                                <-Reducer 15 [ONE_TO_ONE_EDGE]
-                                  FORWARD [RS_45]
-                                    PartitionCols:_col0
-                                    Group By Operator [GBY_35] (rows=316797606 width=88)
-                                      Output:["_col0"],keys:KEY._col0
-                                    <-Reducer 14 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_34]
-                                        PartitionCols:_col0
-                                        Group By Operator [GBY_33] (rows=633595212 width=88)
-                                          Output:["_col0"],keys:_col1
-                                          Merge Join Operator [MERGEJOIN_109] (rows=633595212 width=88)
-                                            Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1"]
-                                          <-Map 16 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_30]
-                                              PartitionCols:_col0
-                                               Please refer to the previous Select Operator [SEL_28]
-                                          <-Map 13 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_29]
-                                              PartitionCols:_col0
-                                              Select Operator [SEL_25] (rows=575995635 width=88)
-                                                Output:["_col0","_col1"]
-                                                Filter Operator [FIL_102] (rows=575995635 width=88)
-                                                  predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
-                                                  TableScan [TS_23] (rows=575995635 width=88)
-                                                    default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
-                                <-Reducer 3 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_43]
-                                    PartitionCols:_col0
-                                    Merge Join Operator [MERGEJOIN_107] (rows=96800003 width=860)
-                                      Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-                                    <-Map 9 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_41]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_8] (rows=1861800 width=385)
-                                          Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                                          Filter Operator [FIL_99] (rows=1861800 width=385)
-                                            predicate:cd_demo_sk is not null
-                                            TableScan [TS_6] (rows=1861800 width=385)
-                                              default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
-                                    <-Reducer 2 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_40]
-                                        PartitionCols:_col1
-                                        Merge Join Operator [MERGEJOIN_106] (rows=88000001 width=860)
-                                          Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1"]
-                                        <-Map 1 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_37]
-                                            PartitionCols:_col2
-                                            Select Operator [SEL_2] (rows=80000000 width=860)
-                                              Output:["_col0","_col1","_col2"]
-                                              Filter Operator [FIL_97] (rows=80000000 width=860)
-                                                predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
-                                                TableScan [TS_0] (rows=80000000 width=860)
-                                                  default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
-                                        <-Map 8 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_38]
-                                            PartitionCols:_col0
-                                            Select Operator [SEL_5] (rows=20000000 width=1014)
-                                              Output:["_col0"]
-                                              Filter Operator [FIL_98] (rows=20000000 width=1014)
-                                                predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
-                                                TableScan [TS_3] (rows=40000000 width=1014)
-                                                  default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
+                              Merge Join Operator [MERGEJOIN_104] (rows=96800003 width=860)
+                                Conds:RS_50._col1=RS_51._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+                              <-Map 8 [SIMPLE_EDGE]
+                                SHUFFLE [RS_51]
+                                  PartitionCols:_col0
+                                  Select Operator [SEL_8] (rows=1861800 width=385)
+                                    Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                                    Filter Operator [FIL_96] (rows=1861800 width=385)
+                                      predicate:cd_demo_sk is not null
+                                      TableScan [TS_6] (rows=1861800 width=385)
+                                        default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
+                              <-Reducer 2 [SIMPLE_EDGE]
+                                SHUFFLE [RS_50]
+                                  PartitionCols:_col1
+                                  Merge Join Operator [MERGEJOIN_103] (rows=88000001 width=860)
+                                    Conds:RS_47._col2=RS_48._col0(Inner),Output:["_col0","_col1"]
+                                  <-Map 1 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_47]
+                                      PartitionCols:_col2
+                                      Select Operator [SEL_2] (rows=80000000 width=860)
+                                        Output:["_col0","_col1","_col2"]
+                                        Filter Operator [FIL_94] (rows=80000000 width=860)
+                                          predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
+                                          TableScan [TS_0] (rows=80000000 width=860)
+                                            default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+                                  <-Map 7 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_48]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_5] (rows=20000000 width=1014)
+                                        Output:["_col0"]
+                                        Filter Operator [FIL_95] (rows=20000000 width=1014)
+                                          predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
+                                          TableScan [TS_3] (rows=40000000 width=1014)
+                                            default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/tez/query14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
index b2a45f1..dfdd53d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
@@ -1,6 +1,6 @@
-Warning: Shuffle Join MERGEJOIN[890][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product
-Warning: Shuffle Join MERGEJOIN[891][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 12' is a cross product
 Warning: Shuffle Join MERGEJOIN[892][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 16' is a cross product
+Warning: Shuffle Join MERGEJOIN[891][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 12' is a cross product
+Warning: Shuffle Join MERGEJOIN[890][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product
 PREHOOK: query: explain
 with  cross_items as
  (select i_item_sk ss_item_sk

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/tez/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
index a7b710d..8107a05 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
@@ -1,4 +1,3 @@
-Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product
 PREHOOK: query: explain
 select  
    count(distinct cs_order_number) as `order count`
@@ -63,179 +62,125 @@ Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 14 <- Map 13 (SIMPLE_EDGE)
-Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
-Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
-Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE)
-Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
-Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
-Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 9
-      File Output Operator [FS_74]
-        Limit [LIM_72] (rows=1 width=344)
+      Reducer 8
+      File Output Operator [FS_49]
+        Limit [LIM_47] (rows=1 width=344)
           Number of rows:100
-          Select Operator [SEL_71] (rows=1 width=344)
+          Select Operator [SEL_46] (rows=1 width=344)
             Output:["_col0","_col1","_col2"]
-          <-Reducer 8 [SIMPLE_EDGE]
-            SHUFFLE [RS_70]
-              Select Operator [SEL_69] (rows=1 width=344)
+          <-Reducer 7 [SIMPLE_EDGE]
+            SHUFFLE [RS_45]
+              Select Operator [SEL_44] (rows=1 width=344)
                 Output:["_col1","_col2","_col3"]
-                Group By Operator [GBY_112] (rows=1 width=344)
+                Group By Operator [GBY_77] (rows=1 width=344)
                   Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
-                <-Reducer 7 [CUSTOM_SIMPLE_EDGE]
-                  PARTITION_ONLY_SHUFFLE [RS_111]
-                    Group By Operator [GBY_110] (rows=1 width=344)
+                <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
+                  PARTITION_ONLY_SHUFFLE [RS_76]
+                    Group By Operator [GBY_75] (rows=1 width=344)
                       Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
-                      Group By Operator [GBY_109] (rows=1395035081047425024 width=1)
+                      Group By Operator [GBY_74] (rows=421645953 width=135)
                         Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
-                      <-Reducer 6 [SIMPLE_EDGE]
-                        SHUFFLE [RS_108]
+                      <-Reducer 5 [SIMPLE_EDGE]
+                        SHUFFLE [RS_73]
                           PartitionCols:_col0
-                          Group By Operator [GBY_107] (rows=1395035081047425024 width=1)
+                          Group By Operator [GBY_72] (rows=421645953 width=135)
                             Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
-                            Select Operator [SEL_65] (rows=1395035081047425024 width=1)
+                            Select Operator [SEL_40] (rows=421645953 width=135)
                               Output:["_col4","_col5","_col6"]
-                              Filter Operator [FIL_64] (rows=1395035081047425024 width=1)
+                              Filter Operator [FIL_39] (rows=421645953 width=135)
                                 predicate:_col16 is null
-                                Select Operator [SEL_63] (rows=2790070162094850048 width=1)
+                                Select Operator [SEL_38] (rows=843291907 width=135)
                                   Output:["_col4","_col5","_col6","_col16"]
-                                  Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1)
-                                    Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"]
-                                  <-Reducer 17 [ONE_TO_ONE_EDGE]
-                                    FORWARD [RS_61]
-                                      PartitionCols:_col0, _col1
-                                      Group By Operator [GBY_46] (rows=2536427365110644736 width=1)
-                                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                                      <-Reducer 16 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_45]
-                                          PartitionCols:_col0, _col1
-                                          Group By Operator [GBY_44] (rows=5072854730221289472 width=1)
-                                            Output:["_col0","_col1"],keys:_col2, _col3
-                                            Select Operator [SEL_43] (rows=5072854730221289472 width=1)
-                                              Output:["_col2","_col3"]
-                                              Filter Operator [FIL_42] (rows=5072854730221289472 width=1)
-                                                predicate:(_col2 <> _col0)
-                                                Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1)
-                                                  Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"]
-                                                <-Map 15 [SIMPLE_EDGE]
-                                                  PARTITION_ONLY_SHUFFLE [RS_39]
-                                                    PartitionCols:_col1
-                                                    Select Operator [SEL_20] (rows=287989836 width=135)
-                                                      Output:["_col0","_col1"]
-                                                      TableScan [TS_19] (rows=287989836 width=135)
-                                                        default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"]
-                                                <-Reducer 19 [SIMPLE_EDGE]
-                                                  SHUFFLE [RS_40]
-                                                    PartitionCols:_col1
-                                                    Select Operator [SEL_38] (rows=4611686018427387903 width=1)
-                                                      Output:["_col0","_col1"]
-                                                      Group By Operator [GBY_37] (rows=4611686018427387903 width=1)
-                                                        Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                                                      <-Reducer 18 [SIMPLE_EDGE]
-                                                        SHUFFLE [RS_36]
-                                                          PartitionCols:_col0, _col1
-                                                          Group By Operator [GBY_35] (rows=9223372036854775807 width=1)
-                                                            Output:["_col0","_col1"],keys:_col4, _col3
-                                                            Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1)
-                                                              Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"]
-                                                            <-Map 15 [CUSTOM_SIMPLE_EDGE]
-                                                              PARTITION_ONLY_SHUFFLE [RS_32]
-                                                                 Please refer to the previous Select Operator [SEL_20]
-                                                            <-Map 20 [CUSTOM_SIMPLE_EDGE]
-                                                              PARTITION_ONLY_SHUFFLE [RS_29]
-                                                                Select Operator [SEL_22] (rows=73049 width=4)
-                                                                  TableScan [TS_21] (rows=73049 width=1119)
-                                                                    default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE
-                                                            <-Map 21 [CUSTOM_SIMPLE_EDGE]
-                                                              PARTITION_ONLY_SHUFFLE [RS_30]
-                                                                Select Operator [SEL_24] (rows=60 width=4)
-                                                                  TableScan [TS_23] (rows=60 width=2045)
-                                                                    default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE
-                                                            <-Map 22 [CUSTOM_SIMPLE_EDGE]
-                                                              PARTITION_ONLY_SHUFFLE [RS_31]
-                                                                Select Operator [SEL_26] (rows=40000000 width=4)
-                                                                  TableScan [TS_25] (rows=40000000 width=1014)
-                                                                    default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE
-                                  <-Reducer 5 [SIMPLE_EDGE]
-                                    SHUFFLE [RS_60]
-                                      PartitionCols:_col3, _col4
-                                      Merge Join Operator [MERGEJOIN_118] (rows=421645953 width=135)
-                                        Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"]
-                                      <-Reducer 14 [ONE_TO_ONE_EDGE]
-                                        FORWARD [RS_58]
+                                  Merge Join Operator [MERGEJOIN_81] (rows=843291907 width=135)
+                                    Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)}
+                                  <-Map 12 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_35]
+                                      PartitionCols:_col0
+                                      Group By Operator [GBY_33] (rows=287989836 width=135)
+                                        Output:["_col0","_col1"],keys:_col0, _col1
+                                        Select Operator [SEL_24] (rows=287989836 width=135)
+                                          Output:["_col0","_col1"]
+                                          Filter Operator [FIL_70] (rows=287989836 width=135)
+                                            predicate:(cs_order_number is not null and cs_warehouse_sk is not null)
+                                            TableScan [TS_22] (rows=287989836 width=135)
+                                              default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"]
+                                  <-Reducer 14 [ONE_TO_ONE_EDGE]
+                                    FORWARD [RS_36]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_31] (rows=14399440 width=106)
+                                        Output:["_col0","_col1"]
+                                        Group By Operator [GBY_30] (rows=14399440 width=106)
+                                          Output:["_col0"],keys:KEY._col0
+                                        <-Map 13 [SIMPLE_EDGE]
+                                          SHUFFLE [RS_29]
+                                            PartitionCols:_col0
+                                            Group By Operator [GBY_28] (rows=28798881 width=106)
+                                              Output:["_col0"],keys:cr_order_number
+                                              Filter Operator [FIL_71] (rows=28798881 width=106)
+                                                predicate:cr_order_number is not null
+                                                TableScan [TS_25] (rows=28798881 width=106)
+                                                  default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"]
+                                  <-Reducer 4 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_34]
+                                      PartitionCols:_col4
+                                      Merge Join Operator [MERGEJOIN_80] (rows=383314495 width=135)
+                                        Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
+                                      <-Map 11 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_19]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_18] (rows=14399440 width=106)
-                                            Output:["_col0","_col1"]
-                                            Group By Operator [GBY_17] (rows=14399440 width=106)
-                                              Output:["_col0"],keys:KEY._col0
-                                            <-Map 13 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_16]
-                                                PartitionCols:_col0
-                                                Group By Operator [GBY_15] (rows=28798881 width=106)
-                                                  Output:["_col0"],keys:cr_order_number
-                                                  Filter Operator [FIL_104] (rows=28798881 width=106)
-                                                    predicate:cr_order_number is not null
-                                                    TableScan [TS_12] (rows=28798881 width=106)
-                                                      default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"]
-                                      <-Reducer 4 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_57]
-                                          PartitionCols:_col4
-                                          Merge Join Operator [MERGEJOIN_116] (rows=383314495 width=135)
-                                            Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
-                                          <-Map 12 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_55]
+                                          Select Operator [SEL_11] (rows=30 width=2045)
+                                            Output:["_col0"]
+                                            Filter Operator [FIL_69] (rows=30 width=2045)
+                                              predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
+                                              TableScan [TS_9] (rows=60 width=2045)
+                                                default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
+                                      <-Reducer 3 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_18]
+                                          PartitionCols:_col2
+                                          Merge Join Operator [MERGEJOIN_79] (rows=348467716 width=135)
+                                            Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
+                                          <-Map 10 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_16]
                                               PartitionCols:_col0
-                                              Select Operator [SEL_11] (rows=30 width=2045)
+                                              Select Operator [SEL_8] (rows=20000000 width=1014)
                                                 Output:["_col0"]
-                                                Filter Operator [FIL_103] (rows=30 width=2045)
-                                                  predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
-                                                  TableScan [TS_9] (rows=60 width=2045)
-                                                    default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
-                                          <-Reducer 3 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_54]
-                                              PartitionCols:_col2
-                                              Merge Join Operator [MERGEJOIN_115] (rows=348467716 width=135)
-                                                Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
-                                              <-Map 11 [SIMPLE_EDGE]
-                                                SHUFFLE [RS_52]
+                                                Filter Operator [FIL_68] (rows=20000000 width=1014)
+                                                  predicate:((ca_state = 'NY') and ca_address_sk is not null)
+                                                  TableScan [TS_6] (rows=40000000 width=1014)
+                                                    default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+                                          <-Reducer 2 [SIMPLE_EDGE]
+                                            SHUFFLE [RS_15]
+                                              PartitionCols:_col1
+                                              Merge Join Operator [MERGEJOIN_78] (rows=316788826 width=135)
+                                                Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
+                                              <-Map 1 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_12]
                                                   PartitionCols:_col0
-                                                  Select Operator [SEL_8] (rows=20000000 width=1014)
+                                                  Select Operator [SEL_2] (rows=287989836 width=135)
+                                                    Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+                                                    Filter Operator [FIL_66] (rows=287989836 width=135)
+                                                      predicate:(cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null)
+                                                      TableScan [TS_0] (rows=287989836 width=135)
+                                                        default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"]
+                                              <-Map 9 [SIMPLE_EDGE]
+                                                SHUFFLE [RS_13]
+                                                  PartitionCols:_col0
+                                                  Select Operator [SEL_5] (rows=8116 width=1119)
                                                     Output:["_col0"]
-                                                    Filter Operator [FIL_102] (rows=20000000 width=1014)
-                                                      predicate:((ca_state = 'NY') and ca_address_sk is not null)
-                                                      TableScan [TS_6] (rows=40000000 width=1014)
-                                                        default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
-                                              <-Reducer 2 [SIMPLE_EDGE]
-                                                SHUFFLE [RS_51]
-                                                  PartitionCols:_col1
-                                                  Merge Join Operator [MERGEJOIN_114] (rows=316788826 width=135)
-                                                    Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
-                                                  <-Map 1 [SIMPLE_EDGE]
-                                                    SHUFFLE [RS_48]
-                                                      PartitionCols:_col0
-                                                      Select Operator [SEL_2] (rows=287989836 width=135)
-                                                        Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
-                                                        Filter Operator [FIL_100] (rows=287989836 width=135)
-                                                          predicate:(cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null)
-                                                          TableScan [TS_0] (rows=287989836 width=135)
-                                                            default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"]
-                                                  <-Map 10 [SIMPLE_EDGE]
-                                                    SHUFFLE [RS_49]
-                                                      PartitionCols:_col0
-                                                      Select Operator [SEL_5] (rows=8116 width=1119)
-                                                        Output:["_col0"]
-                                                        Filter Operator [FIL_101] (rows=8116 width=1119)
-                                                          predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
-                                                          TableScan [TS_3] (rows=73049 width=1119)
-                                                            default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+                                                    Filter Operator [FIL_67] (rows=8116 width=1119)
+                                                      predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
+                                                      TableScan [TS_3] (rows=73049 width=1119)
+                                                        default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/tez/query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
index 7112de6..3507278 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
@@ -1,5 +1,5 @@
-Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product
 Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product
 PREHOOK: query: explain
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt

http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/perf/tez/query35.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/query35.q.out
index a72f578..decbadb 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query35.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query35.q.out
@@ -113,172 +113,160 @@ POSTHOOK: type: QUERY
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
-Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
-Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE)
+Reducer 10 <- Map 11 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
+Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
 Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
-Reducer 16 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE)
-Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
     limit:-1
     Stage-1
-      Reducer 7
-      File Output Operator [FS_75]
-        Limit [LIM_73] (rows=100 width=88)
+      Reducer 6
+      File Output Operator [FS_70]
+        Limit [LIM_68] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_72] (rows=383325119 width=88)
+          Select Operator [SEL_67] (rows=1045432122 width=88)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"]
-          <-Reducer 6 [SIMPLE_EDGE]
-            SHUFFLE [RS_71]
-              Select Operator [SEL_70] (rows=383325119 width=88)
+          <-Reducer 5 [SIMPLE_EDGE]
+            SHUFFLE [RS_66]
+              Select Operator [SEL_65] (rows=1045432122 width=88)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"]
-                Group By Operator [GBY_69] (rows=383325119 width=88)
+                Group By Operator [GBY_64] (rows=1045432122 width=88)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","avg(VALUE._col1)","max(VALUE._col2)","sum(VALUE._col3)","avg(VALUE._col4)","max(VALUE._col5)","sum(VALUE._col6)","avg(VALUE._col7)","max(VALUE._col8)","sum(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5
-                <-Reducer 5 [SIMPLE_EDGE]
-                  SHUFFLE [RS_68]
+                <-Reducer 4 [SIMPLE_EDGE]
+                  SHUFFLE [RS_63]
                     PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5
-                    Group By Operator [GBY_67] (rows=766650239 width=88)
+                    Group By Operator [GBY_62] (rows=2090864244 width=88)
                       Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","avg(_col8)","max(_col8)","sum(_col8)","avg(_col9)","max(_col9)","sum(_col9)","avg(_col10)","max(_col10)","sum(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10
-                      Select Operator [SEL_66] (rows=766650239 width=88)
+                      Select Operator [SEL_61] (rows=2090864244 width=88)
                         Output:["_col4","_col6","_col7","_col8","_col9","_col10"]
-                        Filter Operator [FIL_65] (rows=766650239 width=88)
-                          predicate:(_col13 is not null or _col15 is not null)
-                          Merge Join Operator [MERGEJOIN_113] (rows=766650239 width=88)
-                            Conds:RS_62._col0=RS_63._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col13","_col15"]
-                          <-Reducer 17 [ONE_TO_ONE_EDGE]
-                            FORWARD [RS_63]
+                        Filter Operator [FIL_60] (rows=2090864244 width=88)
+                          predicate:(_col12 is not null or _col14 is not null)
+                          Merge Join Operator [MERGEJOIN_109] (rows=2090864244 width=88)
+                            Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_57._col0(Left Outer),RS_55._col0=RS_58._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col12","_col14"]
+                          <-Reducer 10 [SIMPLE_EDGE]
+                            SHUFFLE [RS_56]
                               PartitionCols:_col0
-                              Select Operator [SEL_61] (rows=158394413 width=135)
+                              Group By Operator [GBY_54] (rows=633595212 width=88)
+                                Output:["_col0"],keys:_col0
+                                Select Operator [SEL_18] (rows=633595212 width=88)
+                                  Output:["_col0"]
+                                  Merge Join Operator [MERGEJOIN_106] (rows=633595212 width=88)
+                                    Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"]
+                                  <-Map 11 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_16]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_14] (rows=12174 width=1119)
+                                        Output:["_col0"]
+                                        Filter Operator [FIL_99] (rows=12174 width=1119)
+                                          predicate:((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null)
+                                          TableScan [TS_12] (rows=73049 width=1119)
+                                            default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"]
+                                  <-Map 9 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_15]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_11] (rows=575995635 width=88)
+                                        Output:["_col0","_col1"]
+                                        Filter Operator [FIL_98] (rows=575995635 width=88)
+                                          predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+                                          TableScan [TS_9] (rows=575995635 width=88)
+                                            default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+                          <-Reducer 13 [ONE_TO_ONE_EDGE]
+                            FORWARD [RS_57]
+                              PartitionCols:_col0
+                              Select Operator [SEL_32] (rows=79201469 width=135)
                                 Output:["_col0","_col1"]
-                                Group By Operator [GBY_60] (rows=158394413 width=135)
+                                Group By Operator [GBY_31] (rows=79201469 width=135)
                                   Output:["_col0"],keys:KEY._col0
-                                <-Reducer 16 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_59]
+                                <-Reducer 12 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_30]
                                     PartitionCols:_col0
-                                    Group By Operator [GBY_58] (rows=316788826 width=135)
+                                    Group By Operator [GBY_29] (rows=158402938 width=135)
                                       Output:["_col0"],keys:_col1
-                                      Merge Join Operator [MERGEJOIN_111] (rows=316788826 width=135)
-                                        Conds:RS_54._col0=RS_55._col0(Inner),Output:["_col1"]
-                                      <-Map 13 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_55]
+                                      Merge Join Operator [MERGEJOIN_107] (rows=158402938 width=135)
+                                        Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1"]
+                                      <-Map 11 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_26]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_14] (rows=12174 width=1119)
-                                            Output:["_col0"]
-                                            Filter Operator [FIL_102] (rows=12174 width=1119)
-                                              predicate:((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null)
-                                              TableScan [TS_12] (rows=73049 width=1119)
-                                                default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"]
-                                      <-Map 19 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_54]
+                                           Please refer to the previous Select Operator [SEL_14]
+                                      <-Map 16 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_25]
                                           PartitionCols:_col0
-                                          Select Operator [SEL_50] (rows=287989836 width=135)
+                                          Select Operator [SEL_21] (rows=144002668 width=135)
                                             Output:["_col0","_col1"]
-                                            Filter Operator [FIL_105] (rows=287989836 width=135)
-                                              predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
-                                              TableScan [TS_48] (rows=287989836 width=135)
-                                                default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
-                          <-Reducer 4 [SIMPLE_EDGE]
-                            SHUFFLE [RS_62]
+                                            Filter Operator [FIL_100] (rows=144002668 width=135)
+                                              predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
+                                              TableScan [TS_19] (rows=144002668 width=135)
+                                                default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+                          <-Reducer 15 [ONE_TO_ONE_EDGE]
+                            FORWARD [RS_58]
                               PartitionCols:_col0
-                              Select Operator [SEL_47] (rows=696954748 width=88)
-                                Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col13"]
-                                Merge Join Operator [MERGEJOIN_112] (rows=696954748 width=88)
-                                  Conds:RS_43._col0=RS_44._col0(Left Outer),RS_43._col0=RS_45._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col12"]
-                                <-Reducer 12 [ONE_TO_ONE_EDGE]
-                                  FORWARD [RS_44]
+                              Select Operator [SEL_46] (rows=158394413 width=135)
+                                Output:["_col0","_col1"]
+                                Group By Operator [GBY_45] (rows=158394413 width=135)
+                                  Output:["_col0"],keys:KEY._col0
+                                <-Reducer 14 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_44]
                                     PartitionCols:_col0
-                                    Select Operator [SEL_22] (rows=79201469 width=135)
-                                      Output:["_col0","_col1"]
-                                      Group By Operator [GBY_21] (rows=79201469 width=135)
-                                        Output:["_col0"],keys:KEY._col0
-                                      <-Reducer 11 [SIMPLE_EDGE]
-                                        SHUFFLE [RS_20]
+                                    Group By Operator [GBY_43] (rows=316788826 width=135)
+                                      Output:["_col0"],keys:_col1
+                                      Merge Join Operator [MERGEJOIN_108] (rows=316788826 width=135)
+                                        Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1"]
+                                      <-Map 11 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_40]
                                           PartitionCols:_col0
-                                          Group By Operator [GBY_19] (rows=158402938 width=135)
-                                            Output:["_col0"],keys:_col1
-                                            Merge Join Operator [MERGEJOIN_109] (rows=158402938 width=135)
-                                              Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"]
-                                            <-Map 13 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_16]
-                                                PartitionCols:_col0
-                                                 Please refer to the previous Select Operator [SEL_14]
-                                            <-Map 10 [SIMPLE_EDGE]
-                                              SHUFFLE [RS_15]
-                                                PartitionCols:_col0
-                                                Select Operator [SEL_11] (rows=144002668 width=135)
-                                                  Output:["_col0","_col1"]
-                                                  Filter Operator [FIL_101] (rows=144002668 width=135)
-                                                    predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
-                                                    TableScan [TS_9] (rows=144002668 width=135)
-                                                      default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
-                                <-Reducer 15 [ONE_TO_ONE_EDGE]
-                                  FORWARD [RS_45]
-                                    PartitionCols:_col0
-                                    Group By Operator [GBY_35] (rows=316797606 width=88)
-                                      Output:["_col0"],keys:KEY._col0
-                                    <-Reducer 14 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_34]
-                                        PartitionCols:_col0
-                                        Group By Operator [GBY_33] (rows=633595212 width=88)
-                                          Output:["_col0"],keys:_col1
-                                          Merge Join Operator [MERGEJOIN_110] (rows=633595212 width=88)
-                                            Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1"]
-                                          <-Map 13 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_30]
-                                              PartitionCols:_col0
-                                               Please refer to the previous Select Operator [SEL_14]
-                                          <-Map 18 [SIMPLE_EDGE]
-                                            SHUFFLE [RS_29]
-                                              PartitionCols:_col0
-                                              Select Operator [SEL_25] (rows=575995635 width=88)
-                                                Output:["_col0","_col1"]
-                                                Filter Operator [FIL_103] (rows=575995635 width=88)
-                                                  predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
-                                                  TableScan [TS_23] (rows=575995635 width=88)
-                                                    default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
-                                <-Reducer 3 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_43]
-                                    PartitionCols:_col0
-                                    Merge Join Operator [MERGEJOIN_108] (rows=96800003 width=860)
-                                      Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"]
-                                    <-Map 9 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_41]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_8] (rows=1861800 width=385)
-                                          Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                                          Filter Operator [FIL_100] (rows=1861800 width=385)
-                                            predicate:cd_demo_sk is not null
-                                            TableScan [TS_6] (rows=1861800 width=385)
-                                              default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
-                                    <-Reducer 2 [SIMPLE_EDGE]
-                                      SHUFFLE [RS_40]
-                                        PartitionCols:_col1
-                                        Merge Join Operator [MERGEJOIN_107] (rows=88000001 width=860)
-                                          Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1","_col4"]
-                                        <-Map 1 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_37]
-                                            PartitionCols:_col2
-                                            Select Operator [SEL_2] (rows=80000000 width=860)
-                                              Output:["_col0","_col1","_col2"]
-                                              Filter Operator [FIL_98] (rows=80000000 width=860)
-                                                predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
-                                                TableScan [TS_0] (rows=80000000 width=860)
-                                                  default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
-                                        <-Map 8 [SIMPLE_EDGE]
-                                          SHUFFLE [RS_38]
-                                            PartitionCols:_col0
-                                            Select Operator [SEL_5] (rows=40000000 width=1014)
-                                              Output:["_col0","_col1"]
-                                              Filter Operator [FIL_99] (rows=40000000 width=1014)
-                                                predicate:ca_address_sk is not null
-                                                TableScan [TS_3] (rows=40000000 width=1014)
-                                                  default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+                                           Please refer to the previous Select Operator [SEL_14]
+                                      <-Map 17 [SIMPLE_EDGE]
+                                        SHUFFLE [RS_39]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_35] (rows=287989836 width=135)
+                                            Output:["_col0","_col1"]
+                                            Filter Operator [FIL_102] (rows=287989836 width=135)
+                                              predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
+                                              TableScan [TS_33] (rows=287989836 width=135)
+                                                default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
+                          <-Reducer 3 [SIMPLE_EDGE]
+                            SHUFFLE [RS_55]
+                              PartitionCols:_col0
+                              Merge Join Operator [MERGEJOIN_105] (rows=96800003 width=860)
+                                Conds:RS_50._col1=RS_51._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"]
+                              <-Map 8 [SIMPLE_EDGE]
+                                SHUFFLE [RS_51]
+                                  PartitionCols:_col0
+                                  Select Operator [SEL_8] (rows=1861800 width=385)
+                                    Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+                                    Filter Operator [FIL_97] (rows=1861800 width=385)
+                                      predicate:cd_demo_sk is not null
+                                      TableScan [TS_6] (rows=1861800 width=385)
+                                        default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
+                              <-Reducer 2 [SIMPLE_EDGE]
+                                SHUFFLE [RS_50]
+                                  PartitionCols:_col1
+                                  Merge Join Operator [MERGEJOIN_104] (rows=88000001 width=860)
+                                    Conds:RS_47._col2=RS_48._col0(Inner),Output:["_col0","_col1","_col4"]
+                                  <-Map 1 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_47]
+                                      PartitionCols:_col2
+                                      Select Operator [SEL_2] (rows=80000000 width=860)
+                                        Output:["_col0","_col1","_col2"]
+                                        Filter Operator [FIL_95] (rows=80000000 width=860)
+                                          predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
+                                          TableScan [TS_0] (rows=80000000 width=860)
+                                            default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+                                  <-Map 7 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_48]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_5] (rows=40000000 width=1014)
+                                        Output:["_col0","_col1"]
+                                        Filter Operator [FIL_96] (rows=40000000 width=1014)
+                                          predicate:ca_address_sk is not null
+                                          TableScan [TS_3] (rows=40000000 width=1014)
+                                            default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]