You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by jc...@apache.org on 2018/10/20 00:28:07 UTC

[1/3] hive git commit: HIVE-20767: Multiple project between join operators may affect join reordering using constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 14c72c68a -> 4a7de47a4


http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/test/results/clientpositive/perf/tez/query54.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out
index 1c17d2a..0fd1b42 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out
@@ -1,7 +1,7 @@
-Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product
-Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product
-Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product
-Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product
+Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product
+Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product
+Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product
 PREHOOK: query: explain
 with my_customers as (
  select distinct c_customer_sk
@@ -133,31 +133,31 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 1 <- Reducer 12 (BROADCAST_EDGE)
-Map 17 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 29 (BROADCAST_EDGE), Union 18 (CONTAINS)
-Map 23 <- Reducer 25 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Union 18 (CONTAINS)
-Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE)
-Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
-Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
-Reducer 19 <- Map 24 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE)
-Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
-Reducer 21 <- Map 28 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
-Reducer 22 <- Reducer 21 (SIMPLE_EDGE)
-Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE)
-Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE)
-Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 31 <- Map 30 (SIMPLE_EDGE)
-Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE)
-Reducer 33 <- Map 30 (SIMPLE_EDGE)
-Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE)
-Reducer 35 <- Map 30 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE)
-Reducer 5 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE)
-Reducer 6 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE)
-Reducer 7 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
+Map 1 <- Reducer 11 (BROADCAST_EDGE)
+Map 16 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE), Union 17 (CONTAINS)
+Map 22 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 17 (CONTAINS)
+Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
+Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE)
+Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 20 <- Map 27 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Reducer 21 <- Reducer 20 (SIMPLE_EDGE)
+Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE)
+Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE)
+Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 30 <- Map 29 (SIMPLE_EDGE)
+Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE)
+Reducer 32 <- Map 29 (SIMPLE_EDGE)
+Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE)
+Reducer 34 <- Map 29 (SIMPLE_EDGE)
+Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE)
+Reducer 6 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
 Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
 Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
@@ -165,19 +165,19 @@ Stage-0
   Fetch Operator
     limit:100
     Stage-1
-      Reducer 10 vectorized
+      Reducer 9 vectorized
       File Output Operator [FS_360]
         Limit [LIM_359] (rows=100 width=158)
           Number of rows:100
           Select Operator [SEL_358] (rows=1614130953450400 width=158)
             Output:["_col0","_col1","_col2"]
-          <-Reducer 9 [SIMPLE_EDGE] vectorized
+          <-Reducer 8 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_357]
               Select Operator [SEL_356] (rows=1614130953450400 width=158)
                 Output:["_col0","_col1","_col2"]
                 Group By Operator [GBY_355] (rows=1614130953450400 width=158)
                   Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
-                <-Reducer 8 [SIMPLE_EDGE] vectorized
+                <-Reducer 7 [SIMPLE_EDGE] vectorized
                   SHUFFLE [RS_354]
                     PartitionCols:_col0
                     Group By Operator [GBY_353] (rows=3228261906900801 width=158)
@@ -186,7 +186,7 @@ Stage-0
                         Output:["_col0"]
                         Group By Operator [GBY_351] (rows=3228261906900801 width=158)
                           Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
-                        <-Reducer 7 [SIMPLE_EDGE]
+                        <-Reducer 6 [SIMPLE_EDGE]
                           SHUFFLE [RS_119]
                             PartitionCols:_col0
                             Group By Operator [GBY_118] (rows=6456523813801603 width=158)
@@ -195,259 +195,259 @@ Stage-0
                                 Output:["_col0","_col1"]
                                 Filter Operator [FIL_116] (rows=6456523813801603 width=158)
                                   predicate:_col2 BETWEEN _col3 AND _col4
-                                  Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158)
-                                    Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"]
-                                  <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized
-                                    PARTITION_ONLY_SHUFFLE [RS_350]
-                                      Group By Operator [GBY_349] (rows=9131 width=1119)
-                                        Output:["_col0"],keys:KEY._col0
-                                      <-Map 30 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_331]
-                                          PartitionCols:_col0
-                                          Group By Operator [GBY_328] (rows=18262 width=1119)
-                                            Output:["_col0"],keys:_col0
-                                            Select Operator [SEL_325] (rows=18262 width=1119)
-                                              Output:["_col0"]
-                                              Filter Operator [FIL_322] (rows=18262 width=1119)
-                                                predicate:((d_moy = 3) and (d_year = 1999))
-                                                TableScan [TS_50] (rows=73049 width=1119)
-                                                  default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"]
-                                  <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
-                                    PARTITION_ONLY_SHUFFLE [RS_113]
-                                      Select Operator [SEL_104] (rows=6363893803988 width=1226)
-                                        Output:["_col0","_col1","_col2","_col3"]
-                                        Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1226)
-                                          Conds:(Inner),Output:["_col0","_col4","_col11","_col13"]
-                                        <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized
-                                          PARTITION_ONLY_SHUFFLE [RS_348]
-                                            Select Operator [SEL_347] (rows=1 width=8)
-                                              Filter Operator [FIL_346] (rows=1 width=8)
+                                  Select Operator [SEL_115] (rows=58108714324214428 width=158)
+                                    Output:["_col0","_col1","_col2","_col3","_col4"]
+                                    Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158)
+                                      Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"]
+                                    <-Reducer 33 [CUSTOM_SIMPLE_EDGE]
+                                      PARTITION_ONLY_SHUFFLE [RS_112]
+                                        Merge Join Operator [MERGEJOIN_270] (rows=9131 width=1128)
+                                          Conds:(Right Outer),Output:["_col0"]
+                                        <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized
+                                          PARTITION_ONLY_SHUFFLE [RS_342]
+                                            Group By Operator [GBY_341] (rows=9131 width=1119)
+                                              Output:["_col0"],keys:KEY._col0
+                                            <-Map 29 [SIMPLE_EDGE] vectorized
+                                              SHUFFLE [RS_330]
+                                                PartitionCols:_col0
+                                                Group By Operator [GBY_327] (rows=18262 width=1119)
+                                                  Output:["_col0"],keys:_col0
+                                                  Select Operator [SEL_324] (rows=18262 width=1119)
+                                                    Output:["_col0"]
+                                                    Filter Operator [FIL_322] (rows=18262 width=1119)
+                                                      predicate:((d_moy = 3) and (d_year = 1999))
+                                                      TableScan [TS_73] (rows=73049 width=1119)
+                                                        default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"]
+                                        <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized
+                                          PARTITION_ONLY_SHUFFLE [RS_350]
+                                            Select Operator [SEL_349] (rows=1 width=8)
+                                              Filter Operator [FIL_348] (rows=1 width=8)
                                                 predicate:(sq_count_check(_col0) <= 1)
-                                                Group By Operator [GBY_345] (rows=1 width=8)
+                                                Group By Operator [GBY_347] (rows=1 width=8)
                                                   Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                                                <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                  PARTITION_ONLY_SHUFFLE [RS_344]
-                                                    Group By Operator [GBY_343] (rows=1 width=8)
+                                                <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                  PARTITION_ONLY_SHUFFLE [RS_346]
+                                                    Group By Operator [GBY_345] (rows=1 width=8)
                                                       Output:["_col0"],aggregations:["count()"]
-                                                      Select Operator [SEL_342] (rows=9131 width=1119)
-                                                        Group By Operator [GBY_341] (rows=9131 width=1119)
+                                                      Select Operator [SEL_344] (rows=9131 width=1119)
+                                                        Group By Operator [GBY_343] (rows=9131 width=1119)
                                                           Output:["_col0"],keys:KEY._col0
-                                                        <-Map 30 [SIMPLE_EDGE] vectorized
-                                                          SHUFFLE [RS_330]
+                                                        <-Map 29 [SIMPLE_EDGE] vectorized
+                                                          SHUFFLE [RS_331]
                                                             PartitionCols:_col0
-                                                            Group By Operator [GBY_327] (rows=18262 width=1119)
+                                                            Group By Operator [GBY_328] (rows=18262 width=1119)
                                                               Output:["_col0"],keys:_col0
-                                                              Select Operator [SEL_324] (rows=18262 width=1119)
+                                                              Select Operator [SEL_325] (rows=18262 width=1119)
                                                                 Output:["_col0"]
                                                                  Please refer to the previous Filter Operator [FIL_322]
-                                        <-Reducer 5 [CUSTOM_SIMPLE_EDGE]
-                                          PARTITION_ONLY_SHUFFLE [RS_101]
-                                            Select Operator [SEL_85] (rows=6363893803988 width=1217)
-                                              Output:["_col0","_col4","_col11","_col13"]
-                                              Merge Join Operator [MERGEJOIN_271] (rows=6363893803988 width=1217)
-                                                Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"]
-                                              <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                PARTITION_ONLY_SHUFFLE [RS_334]
-                                                  Group By Operator [GBY_332] (rows=9131 width=1119)
-                                                    Output:["_col0"],keys:KEY._col0
-                                                  <-Map 30 [SIMPLE_EDGE] vectorized
-                                                    SHUFFLE [RS_329]
-                                                      PartitionCols:_col0
-                                                      Group By Operator [GBY_326] (rows=18262 width=1119)
-                                                        Output:["_col0"],keys:_col0
-                                                        Select Operator [SEL_323] (rows=18262 width=1119)
-                                                          Output:["_col0"]
-                                                           Please refer to the previous Filter Operator [FIL_322]
-                                              <-Reducer 4 [CUSTOM_SIMPLE_EDGE]
-                                                PARTITION_ONLY_SHUFFLE [RS_82]
-                                                  Merge Join Operator [MERGEJOIN_270] (rows=696954748 width=97)
-                                                    Conds:(Inner),Output:["_col2","_col4","_col10"]
-                                                  <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
-                                                    PARTITION_ONLY_SHUFFLE [RS_79]
-                                                      Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88)
-                                                        Conds:RS_76._col1=RS_77._col5(Inner),Output:["_col2","_col4","_col10"]
-                                                      <-Reducer 15 [SIMPLE_EDGE]
-                                                        SHUFFLE [RS_77]
-                                                          PartitionCols:_col5
-                                                          Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135)
-                                                            Conds:RS_46._col0=RS_321._col1(Inner),Output:["_col5"]
-                                                          <-Reducer 14 [SIMPLE_EDGE]
-                                                            SHUFFLE [RS_46]
-                                                              PartitionCols:_col0
-                                                              Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014)
-                                                                Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"]
-                                                              <-Map 13 [SIMPLE_EDGE] vectorized
-                                                                SHUFFLE [RS_297]
-                                                                  PartitionCols:_col1, _col2
-                                                                  Select Operator [SEL_296] (rows=40000000 width=1014)
-                                                                    Output:["_col0","_col1","_col2"]
-                                                                    Filter Operator [FIL_295] (rows=40000000 width=1014)
-                                                                      predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null)
-                                                                      TableScan [TS_6] (rows=40000000 width=1014)
-                                                                        default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"]
-                                                              <-Map 16 [SIMPLE_EDGE] vectorized
-                                                                SHUFFLE [RS_300]
-                                                                  PartitionCols:_col0, _col1
-                                                                  Select Operator [SEL_299] (rows=1704 width=1910)
-                                                                    Output:["_col0","_col1"]
-                                                                    Filter Operator [FIL_298] (rows=1704 width=1910)
-                                                                      predicate:(s_county is not null and s_state is not null)
-                                                                      TableScan [TS_9] (rows=1704 width=1910)
-                                                                        default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"]
-                                                          <-Reducer 22 [SIMPLE_EDGE] vectorized
-                                                            SHUFFLE [RS_321]
-                                                              PartitionCols:_col1
-                                                              Select Operator [SEL_320] (rows=287491029 width=135)
+                                    <-Reducer 5 [CUSTOM_SIMPLE_EDGE]
+                                      PARTITION_ONLY_SHUFFLE [RS_113]
+                                        Select Operator [SEL_108] (rows=6363893803988 width=1217)
+                                          Output:["_col0","_col4","_col11","_col13"]
+                                          Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1217)
+                                            Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"]
+                                          <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized
+                                            PARTITION_ONLY_SHUFFLE [RS_334]
+                                              Group By Operator [GBY_332] (rows=9131 width=1119)
+                                                Output:["_col0"],keys:KEY._col0
+                                              <-Map 29 [SIMPLE_EDGE] vectorized
+                                                SHUFFLE [RS_329]
+                                                  PartitionCols:_col0
+                                                  Group By Operator [GBY_326] (rows=18262 width=1119)
+                                                    Output:["_col0"],keys:_col0
+                                                    Select Operator [SEL_323] (rows=18262 width=1119)
+                                                      Output:["_col0"]
+                                                       Please refer to the previous Filter Operator [FIL_322]
+                                          <-Reducer 4 [CUSTOM_SIMPLE_EDGE]
+                                            PARTITION_ONLY_SHUFFLE [RS_105]
+                                              Merge Join Operator [MERGEJOIN_271] (rows=696954748 width=97)
+                                                Conds:(Inner),Output:["_col2","_col4","_col10"]
+                                              <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
+                                                PARTITION_ONLY_SHUFFLE [RS_102]
+                                                  Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88)
+                                                    Conds:RS_99._col1=RS_100._col5(Inner),Output:["_col2","_col4","_col10"]
+                                                  <-Reducer 14 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_100]
+                                                      PartitionCols:_col5
+                                                      Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135)
+                                                        Conds:RS_69._col0=RS_321._col1(Inner),Output:["_col5"]
+                                                      <-Reducer 13 [SIMPLE_EDGE]
+                                                        SHUFFLE [RS_69]
+                                                          PartitionCols:_col0
+                                                          Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014)
+                                                            Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"]
+                                                          <-Map 12 [SIMPLE_EDGE] vectorized
+                                                            SHUFFLE [RS_297]
+                                                              PartitionCols:_col1, _col2
+                                                              Select Operator [SEL_296] (rows=40000000 width=1014)
+                                                                Output:["_col0","_col1","_col2"]
+                                                                Filter Operator [FIL_295] (rows=40000000 width=1014)
+                                                                  predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null)
+                                                                  TableScan [TS_29] (rows=40000000 width=1014)
+                                                                    default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"]
+                                                          <-Map 15 [SIMPLE_EDGE] vectorized
+                                                            SHUFFLE [RS_300]
+                                                              PartitionCols:_col0, _col1
+                                                              Select Operator [SEL_299] (rows=1704 width=1910)
                                                                 Output:["_col0","_col1"]
-                                                                Group By Operator [GBY_319] (rows=287491029 width=135)
-                                                                  Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
-                                                                <-Reducer 21 [SIMPLE_EDGE]
-                                                                  SHUFFLE [RS_40]
-                                                                    PartitionCols:_col0, _col1
-                                                                    Group By Operator [GBY_39] (rows=574982058 width=135)
-                                                                      Output:["_col0","_col1"],keys:_col10, _col9
-                                                                      Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135)
-                                                                        Conds:RS_35._col1=RS_315._col0(Inner),Output:["_col9","_col10"]
-                                                                      <-Map 28 [SIMPLE_EDGE] vectorized
-                                                                        PARTITION_ONLY_SHUFFLE [RS_315]
+                                                                Filter Operator [FIL_298] (rows=1704 width=1910)
+                                                                  predicate:(s_county is not null and s_state is not null)
+                                                                  TableScan [TS_32] (rows=1704 width=1910)
+                                                                    default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"]
+                                                      <-Reducer 21 [SIMPLE_EDGE] vectorized
+                                                        SHUFFLE [RS_321]
+                                                          PartitionCols:_col1
+                                                          Select Operator [SEL_320] (rows=287491029 width=135)
+                                                            Output:["_col0","_col1"]
+                                                            Group By Operator [GBY_319] (rows=287491029 width=135)
+                                                              Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+                                                            <-Reducer 20 [SIMPLE_EDGE]
+                                                              SHUFFLE [RS_63]
+                                                                PartitionCols:_col0, _col1
+                                                                Group By Operator [GBY_62] (rows=574982058 width=135)
+                                                                  Output:["_col0","_col1"],keys:_col10, _col9
+                                                                  Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135)
+                                                                    Conds:RS_58._col1=RS_315._col0(Inner),Output:["_col9","_col10"]
+                                                                  <-Map 27 [SIMPLE_EDGE] vectorized
+                                                                    PARTITION_ONLY_SHUFFLE [RS_315]
+                                                                      PartitionCols:_col0
+                                                                      Select Operator [SEL_314] (rows=80000000 width=860)
+                                                                        Output:["_col0","_col1"]
+                                                                        Filter Operator [FIL_313] (rows=80000000 width=860)
+                                                                          predicate:(c_current_addr_sk is not null and c_customer_sk is not null)
+                                                                          TableScan [TS_49] (rows=80000000 width=860)
+                                                                            default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
+                                                                  <-Reducer 19 [SIMPLE_EDGE]
+                                                                    SHUFFLE [RS_58]
+                                                                      PartitionCols:_col1
+                                                                      Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135)
+                                                                        Conds:RS_55._col2=RS_309._col0(Inner),Output:["_col1"]
+                                                                      <-Map 25 [SIMPLE_EDGE] vectorized
+                                                                        PARTITION_ONLY_SHUFFLE [RS_309]
                                                                           PartitionCols:_col0
-                                                                          Select Operator [SEL_314] (rows=80000000 width=860)
-                                                                            Output:["_col0","_col1"]
-                                                                            Filter Operator [FIL_313] (rows=80000000 width=860)
-                                                                              predicate:(c_current_addr_sk is not null and c_customer_sk is not null)
-                                                                              TableScan [TS_26] (rows=80000000 width=860)
-                                                                                default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
-                                                                      <-Reducer 20 [SIMPLE_EDGE]
-                                                                        SHUFFLE [RS_35]
-                                                                          PartitionCols:_col1
-                                                                          Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135)
-                                                                            Conds:RS_32._col2=RS_309._col0(Inner),Output:["_col1"]
-                                                                          <-Map 26 [SIMPLE_EDGE] vectorized
-                                                                            PARTITION_ONLY_SHUFFLE [RS_309]
+                                                                          Select Operator [SEL_308] (rows=115500 width=1436)
+                                                                            Output:["_col0"]
+                                                                            Filter Operator [FIL_307] (rows=115500 width=1436)
+                                                                              predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null)
+                                                                              TableScan [TS_46] (rows=462000 width=1436)
+                                                                                default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"]
+                                                                      <-Reducer 18 [SIMPLE_EDGE]
+                                                                        SHUFFLE [RS_55]
+                                                                          PartitionCols:_col2
+                                                                          Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135)
+                                                                            Conds:Union 17._col0=RS_303._col0(Inner),Output:["_col1","_col2"]
+                                                                          <-Map 23 [SIMPLE_EDGE] vectorized
+                                                                            PARTITION_ONLY_SHUFFLE [RS_303]
                                                                               PartitionCols:_col0
-                                                                              Select Operator [SEL_308] (rows=115500 width=1436)
+                                                                              Select Operator [SEL_302] (rows=18262 width=1119)
                                                                                 Output:["_col0"]
-                                                                                Filter Operator [FIL_307] (rows=115500 width=1436)
-                                                                                  predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null)
-                                                                                  TableScan [TS_23] (rows=462000 width=1436)
-                                                                                    default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"]
-                                                                          <-Reducer 19 [SIMPLE_EDGE]
-                                                                            SHUFFLE [RS_32]
-                                                                              PartitionCols:_col2
-                                                                              Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135)
-                                                                                Conds:Union 18._col0=RS_303._col0(Inner),Output:["_col1","_col2"]
-                                                                              <-Map 24 [SIMPLE_EDGE] vectorized
-                                                                                PARTITION_ONLY_SHUFFLE [RS_303]
-                                                                                  PartitionCols:_col0
-                                                                                  Select Operator [SEL_302] (rows=18262 width=1119)
-                                                                                    Output:["_col0"]
-                                                                                    Filter Operator [FIL_301] (rows=18262 width=1119)
-                                                                                      predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null)
-                                                                                      TableScan [TS_20] (rows=73049 width=1119)
-                                                                                        default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
-                                                                              <-Union 18 [SIMPLE_EDGE]
-                                                                                <-Map 17 [CONTAINS] vectorized
-                                                                                  Reduce Output Operator [RS_371]
-                                                                                    PartitionCols:_col0
-                                                                                    Select Operator [SEL_370] (rows=287989836 width=135)
-                                                                                      Output:["_col0","_col1","_col2"]
-                                                                                      Filter Operator [FIL_369] (rows=287989836 width=135)
-                                                                                        predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_36_customer_c_customer_sk_min) AND DynamicValue(RS_36_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_36_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null)
-                                                                                        TableScan [TS_274] (rows=287989836 width=135)
-                                                                                          Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"]
-                                                                                        <-Reducer 25 [BROADCAST_EDGE] vectorized
-                                                                                          BROADCAST [RS_362]
-                                                                                            Group By Operator [GBY_361] (rows=1 width=12)
-                                                                                              Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                                                            <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                                              PARTITION_ONLY_SHUFFLE [RS_306]
-                                                                                                Group By Operator [GBY_305] (rows=1 width=12)
-                                                                                                  Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                                                  Select Operator [SEL_304] (rows=18262 width=1119)
-                                                                                                    Output:["_col0"]
-                                                                                                     Please refer to the previous Select Operator [SEL_302]
-                                                                                        <-Reducer 27 [BROADCAST_EDGE] vectorized
-                                                                                          BROADCAST [RS_365]
-                                                                                            Group By Operator [GBY_364] (rows=1 width=12)
-                                                                                              Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                                                            <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                                              PARTITION_ONLY_SHUFFLE [RS_312]
-                                                                                                Group By Operator [GBY_311] (rows=1 width=12)
-                                                                                                  Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                                                  Select Operator [SEL_310] (rows=115500 width=1436)
-                                                                                                    Output:["_col0"]
-                                                                                                     Please refer to the previous Select Operator [SEL_308]
-                                                                                        <-Reducer 29 [BROADCAST_EDGE] vectorized
-                                                                                          BROADCAST [RS_368]
-                                                                                            Group By Operator [GBY_367] (rows=1 width=12)
-                                                                                              Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"]
-                                                                                            <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                                              PARTITION_ONLY_SHUFFLE [RS_318]
-                                                                                                Group By Operator [GBY_317] (rows=1 width=12)
-                                                                                                  Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"]
-                                                                                                  Select Operator [SEL_316] (rows=80000000 width=860)
-                                                                                                    Output:["_col0"]
-                                                                                                     Please refer to the previous Select Operator [SEL_314]
-                                                                                <-Map 23 [CONTAINS] vectorized
-                                                                                  Reduce Output Operator [RS_374]
-                                                                                    PartitionCols:_col0
-                                                                                    Select Operator [SEL_373] (rows=144002668 width=135)
-                                                                                      Output:["_col0","_col1","_col2"]
-                                                                                      Filter Operator [FIL_372] (rows=144002668 width=135)
-                                                                                        predicate:((ws_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null)
-                                                                                        TableScan [TS_279] (rows=144002668 width=135)
-                                                                                          Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"]
-                                                                                        <-Reducer 25 [BROADCAST_EDGE] vectorized
-                                                                                          BROADCAST [RS_363]
-                                                                                             Please refer to the previous Group By Operator [GBY_361]
-                                                                                        <-Reducer 27 [BROADCAST_EDGE] vectorized
-                                                                                          BROADCAST [RS_366]
-                                                                                             Please refer to the previous Group By Operator [GBY_364]
-                                                      <-Reducer 2 [SIMPLE_EDGE]
-                                                        SHUFFLE [RS_76]
-                                                          PartitionCols:_col1
-                                                          Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88)
-                                                            Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"]
-                                                          <-Map 11 [SIMPLE_EDGE] vectorized
-                                                            SHUFFLE [RS_286]
-                                                              PartitionCols:_col0
-                                                              Select Operator [SEL_285] (rows=73049 width=1119)
-                                                                Output:["_col0","_col1"]
-                                                                Filter Operator [FIL_284] (rows=73049 width=1119)
-                                                                  predicate:d_date_sk is not null
-                                                                  TableScan [TS_3] (rows=73049 width=1119)
-                                                                    default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
-                                                          <-Map 1 [SIMPLE_EDGE] vectorized
-                                                            SHUFFLE [RS_294]
-                                                              PartitionCols:_col0
-                                                              Select Operator [SEL_293] (rows=575995635 width=88)
-                                                                Output:["_col0","_col1","_col2"]
-                                                                Filter Operator [FIL_292] (rows=575995635 width=88)
-                                                                  predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null)
-                                                                  TableScan [TS_0] (rows=575995635 width=88)
-                                                                    default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"]
-                                                                  <-Reducer 12 [BROADCAST_EDGE] vectorized
-                                                                    BROADCAST [RS_291]
-                                                                      Group By Operator [GBY_290] (rows=1 width=12)
-                                                                        Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
-                                                                      <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                                        SHUFFLE [RS_289]
-                                                                          Group By Operator [GBY_288] (rows=1 width=12)
-                                                                            Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
-                                                                            Select Operator [SEL_287] (rows=73049 width=1119)
-                                                                              Output:["_col0"]
-                                                                               Please refer to the previous Select Operator [SEL_285]
-                                                  <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                    PARTITION_ONLY_SHUFFLE [RS_340]
-                                                      Select Operator [SEL_339] (rows=1 width=8)
-                                                        Filter Operator [FIL_338] (rows=1 width=8)
-                                                          predicate:(sq_count_check(_col0) <= 1)
-                                                          Group By Operator [GBY_337] (rows=1 width=8)
-                                                            Output:["_col0"],aggregations:["count(VALUE._col0)"]
-                                                          <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized
-                                                            PARTITION_ONLY_SHUFFLE [RS_336]
-                                                              Group By Operator [GBY_335] (rows=1 width=8)
-                                                                Output:["_col0"],aggregations:["count()"]
-                                                                Select Operator [SEL_333] (rows=9131 width=1119)
-                                                                   Please refer to the previous Group By Operator [GBY_332]
+                                                                                Filter Operator [FIL_301] (rows=18262 width=1119)
+                                                                                  predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null)
+                                                                                  TableScan [TS_43] (rows=73049 width=1119)
+                                                                                    default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                                                                          <-Union 17 [SIMPLE_EDGE]
+                                                                            <-Map 16 [CONTAINS] vectorized
+                                                                              Reduce Output Operator [RS_371]
+                                                                                PartitionCols:_col0
+                                                                                Select Operator [SEL_370] (rows=287989836 width=135)
+                                                                                  Output:["_col0","_col1","_col2"]
+                                                                                  Filter Operator [FIL_369] (rows=287989836 width=135)
+                                                                                    predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_59_customer_c_customer_sk_min) AND DynamicValue(RS_59_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_59_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_56_item_i_item_sk_min) AND DynamicValue(RS_56_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_56_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null)
+                                                                                    TableScan [TS_274] (rows=287989836 width=135)
+                                                                                      Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"]
+                                                                                    <-Reducer 24 [BROADCAST_EDGE] vectorized
+                                                                                      BROADCAST [RS_362]
+                                                                                        Group By Operator [GBY_361] (rows=1 width=12)
+                                                                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                                                                        <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                                          PARTITION_ONLY_SHUFFLE [RS_306]
+                                                                                            Group By Operator [GBY_305] (rows=1 width=12)
+                                                                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                                                                              Select Operator [SEL_304] (rows=18262 width=1119)
+                                                                                                Output:["_col0"]
+                                                                                                 Please refer to the previous Select Operator [SEL_302]
+                                                                                    <-Reducer 26 [BROADCAST_EDGE] vectorized
+                                                                                      BROADCAST [RS_365]
+                                                                                        Group By Operator [GBY_364] (rows=1 width=12)
+                                                                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                                                                        <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                                          PARTITION_ONLY_SHUFFLE [RS_312]
+                                                                                            Group By Operator [GBY_311] (rows=1 width=12)
+                                                                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                                                                              Select Operator [SEL_310] (rows=115500 width=1436)
+                                                                                                Output:["_col0"]
+                                                                                                 Please refer to the previous Select Operator [SEL_308]
+                                                                                    <-Reducer 28 [BROADCAST_EDGE] vectorized
+                                                                                      BROADCAST [RS_368]
+                                                                                        Group By Operator [GBY_367] (rows=1 width=12)
+                                                                                          Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"]
+                                                                                        <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                                          PARTITION_ONLY_SHUFFLE [RS_318]
+                                                                                            Group By Operator [GBY_317] (rows=1 width=12)
+                                                                                              Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"]
+                                                                                              Select Operator [SEL_316] (rows=80000000 width=860)
+                                                                                                Output:["_col0"]
+                                                                                                 Please refer to the previous Select Operator [SEL_314]
+                                                                            <-Map 22 [CONTAINS] vectorized
+                                                                              Reduce Output Operator [RS_374]
+                                                                                PartitionCols:_col0
+                                                                                Select Operator [SEL_373] (rows=144002668 width=135)
+                                                                                  Output:["_col0","_col1","_col2"]
+                                                                                  Filter Operator [FIL_372] (rows=144002668 width=135)
+                                                                                    predicate:((ws_item_sk BETWEEN DynamicValue(RS_56_item_i_item_sk_min) AND DynamicValue(RS_56_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_56_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null)
+                                                                                    TableScan [TS_279] (rows=144002668 width=135)
+                                                                                      Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"]
+                                                                                    <-Reducer 24 [BROADCAST_EDGE] vectorized
+                                                                                      BROADCAST [RS_363]
+                                                                                         Please refer to the previous Group By Operator [GBY_361]
+                                                                                    <-Reducer 26 [BROADCAST_EDGE] vectorized
+                                                                                      BROADCAST [RS_366]
+                                                                                         Please refer to the previous Group By Operator [GBY_364]
+                                                  <-Reducer 2 [SIMPLE_EDGE]
+                                                    SHUFFLE [RS_99]
+                                                      PartitionCols:_col1
+                                                      Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88)
+                                                        Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"]
+                                                      <-Map 10 [SIMPLE_EDGE] vectorized
+                                                        SHUFFLE [RS_286]
+                                                          PartitionCols:_col0
+                                                          Select Operator [SEL_285] (rows=73049 width=1119)
+                                                            Output:["_col0","_col1"]
+                                                            Filter Operator [FIL_284] (rows=73049 width=1119)
+                                                              predicate:d_date_sk is not null
+                                                              TableScan [TS_26] (rows=73049 width=1119)
+                                                                default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
+                                                      <-Map 1 [SIMPLE_EDGE] vectorized
+                                                        SHUFFLE [RS_294]
+                                                          PartitionCols:_col0
+                                                          Select Operator [SEL_293] (rows=575995635 width=88)
+                                                            Output:["_col0","_col1","_col2"]
+                                                            Filter Operator [FIL_292] (rows=575995635 width=88)
+                                                              predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null)
+                                                              TableScan [TS_23] (rows=575995635 width=88)
+                                                                default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"]
+                                                              <-Reducer 11 [BROADCAST_EDGE] vectorized
+                                                                BROADCAST [RS_291]
+                                                                  Group By Operator [GBY_290] (rows=1 width=12)
+                                                                    Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+                                                                  <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                                    SHUFFLE [RS_289]
+                                                                      Group By Operator [GBY_288] (rows=1 width=12)
+                                                                        Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+                                                                        Select Operator [SEL_287] (rows=73049 width=1119)
+                                                                          Output:["_col0"]
+                                                                           Please refer to the previous Select Operator [SEL_285]
+                                              <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                PARTITION_ONLY_SHUFFLE [RS_340]
+                                                  Select Operator [SEL_339] (rows=1 width=8)
+                                                    Filter Operator [FIL_338] (rows=1 width=8)
+                                                      predicate:(sq_count_check(_col0) <= 1)
+                                                      Group By Operator [GBY_337] (rows=1 width=8)
+                                                        Output:["_col0"],aggregations:["count(VALUE._col0)"]
+                                                      <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized
+                                                        PARTITION_ONLY_SHUFFLE [RS_336]
+                                                          Group By Operator [GBY_335] (rows=1 width=8)
+                                                            Output:["_col0"],aggregations:["count()"]
+                                                            Select Operator [SEL_333] (rows=9131 width=1119)
+                                                               Please refer to the previous Group By Operator [GBY_332]

[3/3] hive git commit: HIVE-20767: Multiple project between join operators may affect join reordering using constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by jc...@apache.org.

HIVE-20767: Multiple project between join operators may affect join reordering using constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4a7de47a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4a7de47a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4a7de47a

Branch: refs/heads/master
Commit: 4a7de47a4cbe3a0c32f076684125274d83308f5b
Parents: 14c72c6
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Fri Oct 19 09:11:04 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Oct 19 17:27:25 2018 -0700

----------------------------------------------------------------------
 .../rules/HiveJoinProjectTransposeRule.java     |   3 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  13 +-
 .../results/clientpositive/llap/lineage2.q.out  |   2 +-
 .../llap/materialized_view_rewrite_7.q.out      | 220 ++++----
 .../clientpositive/llap/multiMapJoin1.q.out     | 348 ++++++------
 .../clientpositive/perf/spark/query54.q.out     | 476 ++++++++--------
 .../clientpositive/perf/tez/query54.q.out       | 552 +++++++++----------
 7 files changed, 781 insertions(+), 833 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
index e684432..b163052 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
@@ -27,13 +27,14 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
 
 public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
 
-  public static final HiveJoinProjectTransposeRule LEFF_PROJECT_BTW_JOIN =
+  public static final HiveJoinProjectTransposeRule LEFT_PROJECT_BTW_JOIN =
       new HiveJoinProjectTransposeRule(
           operand(HiveJoin.class,
                   operand(HiveProject.class, operand(HiveJoin.class, any())),
                   operand(RelNode.class, any())),
           "JoinProjectTransposeRule(Project-Join-Other)",
           false, HiveRelFactories.HIVE_BUILDER);
+
   public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_BTW_JOIN =
       new HiveJoinProjectTransposeRule(
           operand(HiveJoin.class,

http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 22f3266..1085845 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1785,16 +1785,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
         calcitePreCboPlan =
             hepPlan(calcitePreCboPlan, false, mdProvider.getMetadataProvider(), null,
                     HiveRemoveSqCountCheck.INSTANCE);
-        perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
-                              "Calcite: Removing sq_count_check UDF ");
+        perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removing sq_count_check UDF ");
       }
-      //  4.1 Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin.
-      //    Don't run this rule if hive is to remove sq_count_check since that rule expects to have project b/w join.
+
+      //  Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin
       calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider,
-          HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFF_PROJECT_BTW_JOIN,
-          HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN);
+          HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN,
+          HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, HiveProjectMergeRule.INSTANCE);
 
-      // 4.2 Apply join order optimizations: reordering MST algorithm
+      // 4. Apply join order optimizations: reordering MST algorithm
       //    If join optimizations failed because of missing stats, we continue with
       //    the rest of optimizations
       if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) {

http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/test/results/clientpositive/llap/lineage2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out
index d32f490..1cb0cc7 100644
--- a/ql/src/test/results/clientpositive/llap/lineage2.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out
@@ -593,7 +593,7 @@ PREHOOK: Input: default@dept_n10
 PREHOOK: Input: default@emp
 PREHOOK: Input: default@project_n10
 PREHOOK: Output: default@tgt_n10
-{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n  SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n  FROM (\n    SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n    FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n    ) em\n  JOIN dept_n10 d ON d.dept_id = em.dept_id\n  ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(
 e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept_n10.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project_n10.project_id, 'hll')"
 ,"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project_n10.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"ver
 texType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n  SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n  FROM (\n    SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n    FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n    ) em\n  JOIN dept_n10 d ON d.dept_id = em.dept_id\n  ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(
 e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id AND e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept_n10.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project_n10.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute
 _stats(default.project_n10.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]}
 PREHOOK: query: drop table if exists dest_l2
 PREHOOK: type: DROPTABLE
 PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile

http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
index 6f00a5c..d6685c8 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
@@ -267,11 +267,11 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 10 <- Union 5 (CONTAINS)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-        Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 6 <- Union 5 (SIMPLE_EDGE)
+        Map 10 <- Union 4 (CONTAINS)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -316,28 +316,7 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
-        Map 7 
-            Map Operator Tree:
-                TableScan
-                  alias: depts_n6
-                  filterExpr: ((deptno > 10) and (deptno <= 11) and name is not null) (type: boolean)
-                  Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((deptno <= 11) and (deptno > 10) and name is not null) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: deptno (type: int), name (type: varchar(256))
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: varchar(256))
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: varchar(256))
-                        Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: int)
-            Execution mode: vectorized, llap
-            LLAP IO: may be used (ACID table)
-        Map 8 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: locations_n4
@@ -357,6 +336,27 @@ STAGE PLANS:
                         Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: depts_n6
+                  filterExpr: ((deptno > 10) and (deptno <= 11) and name is not null) (type: boolean)
+                  Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((deptno <= 11) and (deptno > 10) and name is not null) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: deptno (type: int), name (type: varchar(256))
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: varchar(256))
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
         Map 9 
             Map Operator Tree:
                 TableScan
@@ -386,33 +386,12 @@ STAGE PLANS:
                      Inner Join 0 to 2
                 keys:
                   0 _col1 (type: varchar(256))
-                  1 _col1 (type: varchar(256))
-                  2 _col0 (type: varchar(256))
-                outputColumnNames: _col0, _col2
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col2 (type: int), _col0 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col1 (type: int)
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1
+                  1 _col0 (type: varchar(256))
+                  2 _col1 (type: varchar(256))
+                outputColumnNames: _col0, _col3
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  keys: _col1 (type: int), _col0 (type: int)
+                  keys: _col0 (type: int), _col3 (type: int)
                   mode: hash
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -421,7 +400,7 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 4 
+        Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -439,7 +418,7 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-        Reducer 6 
+        Reducer 5 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -454,8 +433,25 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Union 5 
-            Vertex: Union 5
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col1 (type: varchar(256))
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: varchar(256))
+                  Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: int)
+        Union 4 
+            Vertex: Union 4
 
   Stage: Stage-0
     Fetch Operator
@@ -578,11 +574,11 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
-        Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
-        Reducer 6 <- Union 5 (SIMPLE_EDGE)
+        Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -629,28 +625,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: bigint)
             Execution mode: llap
             LLAP IO: all inputs
-        Map 7 
-            Map Operator Tree:
-                TableScan
-                  alias: depts_n6
-                  filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean)
-                  Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: deptno (type: int), name (type: varchar(256))
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: varchar(256))
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: varchar(256))
-                        Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: int)
-            Execution mode: vectorized, llap
-            LLAP IO: may be used (ACID table)
-        Map 8 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: locations_n4
@@ -670,7 +645,7 @@ STAGE PLANS:
                         Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 9 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: emps_n8
@@ -691,6 +666,27 @@ STAGE PLANS:
                         value expressions: _col1 (type: float)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: depts_n6
+                  filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean)
+                  Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: deptno (type: int), name (type: varchar(256))
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: varchar(256))
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
         Reducer 11 
             Execution mode: llap
             Reduce Operator Tree:
@@ -721,34 +717,13 @@ STAGE PLANS:
                      Inner Join 0 to 2
                 keys:
                   0 _col1 (type: varchar(256))
-                  1 _col1 (type: varchar(256))
-                  2 _col0 (type: varchar(256))
-                outputColumnNames: _col0, _col2
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: _col2 (type: int), _col0 (type: int)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col1 (type: int)
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col3
+                  1 _col0 (type: varchar(256))
+                  2 _col3 (type: varchar(256))
+                outputColumnNames: _col0, _col4
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
-                  aggregations: count(_col3)
-                  keys: _col1 (type: int)
+                  aggregations: count(_col4)
+                  keys: _col0 (type: int)
                   mode: hash
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
@@ -758,7 +733,7 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
-        Reducer 4 
+        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -779,7 +754,7 @@ STAGE PLANS:
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
-        Reducer 6 
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -799,8 +774,25 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Union 5 
-            Vertex: Union 5
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col3
+                Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col3 (type: varchar(256))
+                  sort order: +
+                  Map-reduce partition columns: _col3 (type: varchar(256))
+                  Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: float)
+        Union 4 
+            Vertex: Union 4
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
index ae821f6..540ff08 100644
--- a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
@@ -881,49 +881,45 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col2 (type: string)
-                            1 _col1 (type: string)
+                            0 _col1 (type: string)
+                            1 _col0 (type: string)
                           outputColumnNames: _col0, _col1, _col2, _col3, _col4
                           input vertices:
                             1 Map 4
                           Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                          Select Operator
-                            expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string)
+                          Map Join Operator
+                            condition map:
+                                 Inner Join 0 to 1
+                            keys:
+                              0 _col2 (type: string)
+                              1 _col1 (type: string)
                             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                            Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
+                            input vertices:
+                              1 Map 5
+                            Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                             Map Join Operator
                               condition map:
                                    Inner Join 0 to 1
                               keys:
-                                0 _col1 (type: string)
+                                0 _col3 (type: string)
                                 1 _col0 (type: string)
-                              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col7
                               input vertices:
-                                1 Map 5
-                              Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
-                              Map Join Operator
-                                condition map:
-                                     Inner Join 0 to 1
-                                keys:
-                                  0 _col2 (type: string)
-                                  1 _col0 (type: string)
+                                1 Map 6
+                              Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
+                              Select Operator
+                                expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col3) (type: int), hash(_col5) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col2) (type: int), hash(_col2) (type: int)
                                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                                input vertices:
-                                  1 Map 6
                                 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
-                                Select Operator
-                                  expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int)
+                                Group By Operator
+                                  aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
+                                  mode: hash
                                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                                  Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
-                                  Group By Operator
-                                    aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
-                                    mode: hash
-                                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                                  Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+                                  Reduce Output Operator
+                                    sort order: 
                                     Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
-                                    Reduce Output Operator
-                                      sort order: 
-                                      Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
-                                      value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
+                                    value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Map 3 
@@ -949,6 +945,26 @@ STAGE PLANS:
         Map 4 
             Map Operator Tree:
                 TableScan
+                  alias: smalltbl3
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
                   alias: smalltbl2
                   filterExpr: value is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
@@ -967,26 +983,6 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: smalltbl3
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
         Map 6 
             Map Operator Tree:
                 TableScan
@@ -1225,49 +1221,45 @@ STAGE PLANS:
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col2 (type: string)
-                            1 _col1 (type: string)
+                            0 _col1 (type: string)
+                            1 _col0 (type: string)
                           outputColumnNames: _col0, _col1, _col2, _col3, _col4
                           input vertices:
                             1 Map 4
                           Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                          Select Operator
-                            expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string)
+                          Map Join Operator
+                            condition map:
+                                 Inner Join 0 to 1
+                            keys:
+                              0 _col2 (type: string)
+                              1 _col1 (type: string)
                             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                            Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
+                            input vertices:
+                              1 Map 5
+                            Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                             Map Join Operator
                               condition map:
                                    Inner Join 0 to 1
                               keys:
-                                0 _col1 (type: string)
+                                0 _col3 (type: string)
                                 1 _col0 (type: string)
-                              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col7
                               input vertices:
-                                1 Map 5
-                              Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
-                              Map Join Operator
-                                condition map:
-                                     Inner Join 0 to 1
-                                keys:
-                                  0 _col2 (type: string)
-                                  1 _col0 (type: string)
+                                1 Map 6
+                              Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
+                              Select Operator
+                                expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col3) (type: int), hash(_col5) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col2) (type: int), hash(_col2) (type: int)
                                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                                input vertices:
-                                  1 Map 6
                                 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
-                                Select Operator
-                                  expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int)
+                                Group By Operator
+                                  aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
+                                  mode: hash
                                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                                  Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
-                                  Group By Operator
-                                    aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
-                                    mode: hash
-                                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                                  Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+                                  Reduce Output Operator
+                                    sort order: 
                                     Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
-                                    Reduce Output Operator
-                                      sort order: 
-                                      Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
-                                      value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
+                                    value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Map 3 
@@ -1293,6 +1285,26 @@ STAGE PLANS:
         Map 4 
             Map Operator Tree:
                 TableScan
+                  alias: smalltbl3
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
                   alias: smalltbl2
                   filterExpr: value is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
@@ -1311,26 +1323,6 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: smalltbl3
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
         Map 6 
             Map Operator Tree:
                 TableScan
@@ -1609,6 +1601,26 @@ STAGE PLANS:
         Map 8 
             Map Operator Tree:
                 TableScan
+                  alias: smalltbl3
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 9 
+            Map Operator Tree:
+                TableScan
                   alias: smalltbl2
                   filterExpr: value is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
@@ -1627,26 +1639,6 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 9 
-            Map Operator Tree:
-                TableScan
-                  alias: smalltbl3
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1659,11 +1651,11 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: string)
+                  key expressions: _col1 (type: string)
                   sort order: +
-                  Map-reduce partition columns: _col2 (type: string)
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+                  value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1671,20 +1663,16 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: string)
-                  1 _col1 (type: string)
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Reduce Output Operator
+                  key expressions: _col2 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col2 (type: string)
                   Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: string)
-                    Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+                  value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
         Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1692,16 +1680,16 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  0 _col2 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: string)
+                  key expressions: _col3 (type: string)
                   sort order: +
-                  Map-reduce partition columns: _col2 (type: string)
+                  Map-reduce partition columns: _col3 (type: string)
                   Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
         Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1709,12 +1697,12 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: string)
+                  0 _col3 (type: string)
                   1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col7
                 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int)
+                  expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col3) (type: int), hash(_col5) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col2) (type: int), hash(_col2) (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
                   Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
@@ -1984,6 +1972,26 @@ STAGE PLANS:
         Map 8 
             Map Operator Tree:
                 TableScan
+                  alias: smalltbl3
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 9 
+            Map Operator Tree:
+                TableScan
                   alias: smalltbl2
                   filterExpr: value is not null (type: boolean)
                   Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
@@ -2002,26 +2010,6 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 9 
-            Map Operator Tree:
-                TableScan
-                  alias: smalltbl3
-                  filterExpr: key is not null (type: boolean)
-                  Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -2034,11 +2022,11 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: string)
+                  key expressions: _col1 (type: string)
                   sort order: +
-                  Map-reduce partition columns: _col2 (type: string)
+                  Map-reduce partition columns: _col1 (type: string)
                   Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+                  value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string)
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -2046,20 +2034,16 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: string)
-                  1 _col1 (type: string)
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Reduce Output Operator
+                  key expressions: _col2 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col2 (type: string)
                   Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: string)
-                    Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
+                  value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
         Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
@@ -2067,16 +2051,16 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  0 _col2 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col2 (type: string)
+                  key expressions: _col3 (type: string)
                   sort order: +
-                  Map-reduce partition columns: _col2 (type: string)
+                  Map-reduce partition columns: _col3 (type: string)
                   Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
         Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
@@ -2084,12 +2068,12 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col2 (type: string)
+                  0 _col3 (type: string)
                   1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col7
                 Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col6) (type: int), hash(_col7) (type: int), hash(_col4) (type: int), hash(_col5) (type: int)
+                  expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col3) (type: int), hash(_col5) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col2) (type: int), hash(_col2) (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
                   Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator

[2/3] hive git commit: HIVE-20767: Multiple project between join operators may affect join reordering using constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by jc...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/4a7de47a/ql/src/test/results/clientpositive/perf/spark/query54.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
index f10250f..7d488f9 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
@@ -1,6 +1,6 @@
-Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 4' is a cross product
-Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product
-Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[111][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
+Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 14' is a cross product
+Warning: Shuffle Join JOIN[114][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product
 Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain
 with my_customers as (
@@ -133,89 +133,18 @@ POSTHOOK: Input: default@web_sales
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
-  Stage-4 depends on stages: Stage-3
-  Stage-1 depends on stages: Stage-4
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
   Stage: Stage-2
     Spark
       Edges:
-        Reducer 28 <- Map 27 (GROUP, 2)
-        Reducer 29 <- Reducer 28 (GROUP, 1)
+        Reducer 29 <- Map 28 (GROUP, 2)
+        Reducer 30 <- Reducer 29 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 27 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: (d_month_seq + 3) (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: int)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized
-        Reducer 28 
-            Execution mode: vectorized
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
-                  Group By Operator
-                    aggregations: count()
-                    mode: hash
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: bigint)
-        Reducer 29 
-            Execution mode: vectorized
-            Local Work:
-              Map Reduce Local Work
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    Spark HashTable Sink Operator
-                      keys:
-                        0 
-                        1 
-
-  Stage: Stage-3
-    Spark
-      Edges:
-        Reducer 23 <- Map 22 (GROUP, 2)
-        Reducer 24 <- Reducer 23 (GROUP, 1)
-#### A masked pattern was here ####
-      Vertices:
-        Map 22 
+        Map 28 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -239,7 +168,7 @@ STAGE PLANS:
                           Map-reduce partition columns: _col0 (type: int)
                           Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Reducer 23 
+        Reducer 29 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
@@ -258,7 +187,7 @@ STAGE PLANS:
                       sort order: 
                       Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col0 (type: bigint)
-        Reducer 24 
+        Reducer 30 
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -278,11 +207,11 @@ STAGE PLANS:
                         0 
                         1 
 
-  Stage: Stage-4
+  Stage: Stage-3
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 12 
+        Map 18 
             Map Operator Tree:
                 TableScan
                   alias: store
@@ -306,25 +235,52 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 654), Reducer 17 (PARTITION-LEVEL SORT, 654)
-        Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 458), Map 18 (PARTITION-LEVEL SORT, 458), Map 19 (PARTITION-LEVEL SORT, 458)
-        Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 505), Reducer 14 (PARTITION-LEVEL SORT, 505)
-        Reducer 16 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009)
-        Reducer 17 <- Reducer 16 (GROUP, 610)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398)
-        Reducer 26 <- Map 25 (GROUP, 2)
-        Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 772), Reducer 2 (PARTITION-LEVEL SORT, 772)
-        Reducer 31 <- Map 30 (GROUP, 2)
-        Reducer 4 <- Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
-        Reducer 5 <- Reducer 31 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1)
+        Reducer 10 <- Reducer 9 (GROUP, 1)
+        Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 15 (PARTITION-LEVEL SORT, 398)
+        Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 772), Reducer 17 (PARTITION-LEVEL SORT, 772)
+        Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 32 (PARTITION-LEVEL SORT, 1)
+        Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 654), Reducer 23 (PARTITION-LEVEL SORT, 654)
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 458), Map 24 (PARTITION-LEVEL SORT, 458), Map 25 (PARTITION-LEVEL SORT, 458)
+        Reducer 21 <- Map 26 (PARTITION-LEVEL SORT, 505), Reducer 20 (PARTITION-LEVEL SORT, 505)
+        Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009)
+        Reducer 23 <- Reducer 22 (GROUP, 610)
+        Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
+        Reducer 32 <- Map 31 (GROUP, 2)
+        Reducer 4 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
+        Reducer 5 <- Reducer 4 (GROUP, 1009)
         Reducer 6 <- Reducer 5 (GROUP, 1009)
-        Reducer 7 <- Reducer 6 (GROUP, 1009)
-        Reducer 8 <- Reducer 7 (SORT, 1)
+        Reducer 7 <- Reducer 6 (SORT, 1)
+        Reducer 9 <- Map 1 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: date_dim
+                  filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
+                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: (d_month_seq + 3) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 11 
+            Map Operator Tree:
+                TableScan
                   alias: store_sales
                   filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
@@ -342,7 +298,27 @@ STAGE PLANS:
                         Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
             Execution mode: vectorized
-        Map 10 
+        Map 15 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  filterExpr: d_date_sk is not null (type: boolean)
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: d_date_sk is not null (type: boolean)
+                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int), d_month_seq (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+            Execution mode: vectorized
+        Map 16 
             Map Operator Tree:
                 TableScan
                   alias: customer_address
@@ -363,7 +339,7 @@ STAGE PLANS:
                           1 _col0 (type: string), _col1 (type: string)
                         outputColumnNames: _col0
                         input vertices:
-                          1 Map 12
+                          1 Map 18
                         Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
@@ -373,7 +349,7 @@ STAGE PLANS:
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
-        Map 13 
+        Map 19 
             Map Operator Tree:
                 TableScan
                   alias: catalog_sales
@@ -393,7 +369,7 @@ STAGE PLANS:
                         Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
-        Map 18 
+        Map 24 
             Map Operator Tree:
                 TableScan
                   alias: web_sales
@@ -413,7 +389,7 @@ STAGE PLANS:
                         Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
-        Map 19 
+        Map 25 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -432,7 +408,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 20 
+        Map 26 
             Map Operator Tree:
                 TableScan
                   alias: item
@@ -451,7 +427,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 21 
+        Map 27 
             Map Operator Tree:
                 TableScan
                   alias: customer
@@ -471,7 +447,7 @@ STAGE PLANS:
                         Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int)
             Execution mode: vectorized
-        Map 25 
+        Map 31 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
@@ -495,51 +471,83 @@ STAGE PLANS:
                           Map-reduce partition columns: _col0 (type: int)
                           Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
-        Map 30 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
-                    Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: (d_month_seq + 3) (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: int)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: int)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized
-        Map 9 
-            Map Operator Tree:
-                TableScan
-                  alias: date_dim
-                  filterExpr: d_date_sk is not null (type: boolean)
-                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: d_date_sk is not null (type: boolean)
-                    Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: d_date_sk (type: int), d_month_seq (type: int)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: int)
+        Reducer 10 
             Execution mode: vectorized
-        Reducer 11 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+        Reducer 12 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col4
+                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
+        Reducer 13 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: int)
+                  1 _col5 (type: int)
+                outputColumnNames: _col2, _col4, _col10
+                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 
+                    1 
+                  outputColumnNames: _col2, _col4, _col10
+                  input vertices:
+                    1 Reducer 30
+                  Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int)
+        Reducer 14 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col2, _col4, _col10, _col13
+                Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int)
+                  outputColumnNames: _col0, _col4, _col11, _col13
+                  Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int)
+        Reducer 17 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -554,7 +562,19 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col5 (type: int)
                   Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE
-        Reducer 14 
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int)
+        Reducer 20 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -570,7 +590,7 @@ STAGE PLANS:
                   Map-reduce partition columns: _col2 (type: int)
                   Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col1 (type: int)
-        Reducer 15 
+        Reducer 21 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -585,7 +605,7 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: _col1 (type: int)
                   Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE
-        Reducer 16 
+        Reducer 22 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -605,7 +625,7 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                     Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE
-        Reducer 17 
+        Reducer 23 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
@@ -623,61 +643,24 @@ STAGE PLANS:
                     Map-reduce partition columns: _col1 (type: int)
                     Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: int)
-        Reducer 2 
+        Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Inner Join 0 to 1
+                     Right Outer Join 0 to 1
+                filter predicates:
+                  0 
+                  1 {true}
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col2, _col4
-                Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col1 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col1 (type: int)
-                  Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
-        Reducer 26 
-            Execution mode: vectorized
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
+                  0 
+                  1 
                 outputColumnNames: _col0
-                Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: int)
-        Reducer 3 
-            Local Work:
-              Map Reduce Local Work
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: int)
-                  1 _col5 (type: int)
-                outputColumnNames: _col2, _col4, _col10
-                Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 
-                    1 
-                  outputColumnNames: _col2, _col4, _col10
-                  input vertices:
-                    1 Reducer 24
-                  Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int)
-        Reducer 31 
+        Reducer 32 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
@@ -690,69 +673,39 @@ STAGE PLANS:
                   Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: int)
         Reducer 4 
-            Local Work:
-              Map Reduce Local Work
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Left Outer Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col2, _col4, _col10, _col13
-                Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int)
-                  outputColumnNames: _col0, _col4, _col11, _col13
-                  Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE
-                  Map Join Operator
-                    condition map:
-                         Inner Join 0 to 1
-                    keys:
-                      0 
-                      1 
-                    outputColumnNames: _col0, _col4, _col11, _col13
-                    input vertices:
-                      1 Reducer 29
-                    Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int)
-                      outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int), _col1 (type: decimal(7,2)), _col2 (type: int), _col3 (type: int)
-        Reducer 5 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                outputColumnNames: _col0, _col2, _col6, _col13, _col15
                 Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: _col2 BETWEEN _col3 AND _col4 (type: boolean)
-                  Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: int), _col1 (type: decimal(7,2))
-                    outputColumnNames: _col0, _col1
+                Select Operator
+                  expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: int), _col0 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: _col2 BETWEEN _col3 AND _col4 (type: boolean)
                     Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: sum(_col1)
-                      keys: _col0 (type: int)
-                      mode: hash
+                    Select Operator
+                      expressions: _col0 (type: int), _col1 (type: decimal(7,2))
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                      Group By Operator
+                        aggregations: sum(_col1)
+                        keys: _col0 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
                         Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: decimal(17,2))
-        Reducer 6 
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: decimal(17,2))
+        Reducer 5 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
@@ -777,7 +730,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: bigint)
-        Reducer 7 
+        Reducer 6 
             Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
@@ -796,7 +749,7 @@ STAGE PLANS:
                     Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
                     value expressions: _col2 (type: int)
-        Reducer 8 
+        Reducer 7 
             Execution mode: vectorized
             Reduce Operator Tree:
               Select Operator
@@ -813,6 +766,25 @@ STAGE PLANS:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                         serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 9 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator