You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/08/01 04:37:50 UTC
[1/2] hive git commit: HIVE-18201 : Disable XPROD_EDGE for
sq_count_check() created for scalar subqueries (Ashutosh Chauhan via Jesus
Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/master e9e1f8f6e -> 4d436953e
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
index a111cd5..7c1780b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
@@ -26,7 +26,7 @@ POSTHOOK: Output: database:default
POSTHOOK: Output: default@myinput1_n1
POSTHOOK: Lineage: myinput1_n1.key SIMPLE [(myinput1_txt_n0)myinput1_txt_n0.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: myinput1_n1.value SIMPLE [(myinput1_txt_n0)myinput1_txt_n0.FieldSchema(name:value, type:int, comment:null), ]
-Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1_n1
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
index d6bad24..43661fa 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[63][bigTable=?] in task 'Reducer 2' is a cross product
PREHOOK: query: explain
select * from (
select count(*) as h8_30_to_9
@@ -32,10 +32,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE)
- Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE)
+ Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -58,7 +57,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
input vertices:
- 1 Map 5
+ 1 Map 4
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -76,7 +75,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
input vertices:
- 1 Map 6
+ 1 Map 5
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -89,7 +88,7 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: src1
@@ -109,7 +108,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: src1
@@ -137,29 +136,24 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Reducer 3
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -178,7 +172,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[63][bigTable=?] in task 'Reducer 2' is a cross product
PREHOOK: query: select * from (
select count(*) as h8_30_to_9
from src
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/perf/tez/query6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/query6.q.out
index caa0e0e..a57f72c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query6.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query6.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[171][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 13' is a cross product
+Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Reducer 19' is a cross product
PREHOOK: query: explain
select a.ca_state state, count(*) cnt
from customer_address a
@@ -52,20 +52,19 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Map 16 <- Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+Map 13 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
Reducer 10 <- Map 9 (SIMPLE_EDGE)
Reducer 11 <- Map 9 (SIMPLE_EDGE)
Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE)
-Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE)
-Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE)
-Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE)
-Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
-Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
+Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE)
-Reducer 22 <- Map 21 (SIMPLE_EDGE)
-Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
@@ -75,16 +74,16 @@ Stage-0
limit:100
Stage-1
Reducer 7 vectorized
- File Output Operator [FS_225]
- Limit [LIM_224] (rows=100 width=88)
+ File Output Operator [FS_227]
+ Limit [LIM_226] (rows=100 width=88)
Number of rows:100
- Select Operator [SEL_223] (rows=127775039 width=88)
+ Select Operator [SEL_225] (rows=127775039 width=88)
Output:["_col0","_col1"]
<-Reducer 6 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_222]
- Filter Operator [FIL_221] (rows=127775039 width=88)
+ SHUFFLE [RS_224]
+ Filter Operator [FIL_223] (rows=127775039 width=88)
predicate:(_col1 >= 10L)
- Group By Operator [GBY_220] (rows=383325119 width=88)
+ Group By Operator [GBY_222] (rows=383325119 width=88)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_69]
@@ -93,7 +92,7 @@ Stage-0
Output:["_col0","_col1"],aggregations:["count()"],keys:_col9
Merge Join Operator [MERGEJOIN_174] (rows=766650239 width=88)
Conds:RS_64._col4=RS_65._col0(Inner),Output:["_col9"]
- <-Reducer 14 [SIMPLE_EDGE]
+ <-Reducer 20 [SIMPLE_EDGE]
SHUFFLE [RS_65]
PartitionCols:_col0
Select Operator [SEL_54] (rows=169400 width=1436)
@@ -101,86 +100,84 @@ Stage-0
Filter Operator [FIL_53] (rows=169400 width=1436)
predicate:(_col4 > (1.2 * CAST( _col0 AS decimal(16,6))))
Merge Join Operator [MERGEJOIN_172] (rows=508200 width=1436)
- Conds:RS_50._col1=RS_214._col2(Inner),Output:["_col0","_col3","_col4"]
- <-Map 23 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_214]
+ Conds:RS_213._col1=RS_216._col2(Inner),Output:["_col0","_col3","_col4"]
+ <-Map 22 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_216]
PartitionCols:_col2
- Select Operator [SEL_213] (rows=462000 width=1436)
+ Select Operator [SEL_215] (rows=462000 width=1436)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_212] (rows=462000 width=1436)
+ Filter Operator [FIL_214] (rows=462000 width=1436)
predicate:(i_category is not null and i_item_sk is not null)
TableScan [TS_44] (rows=462000 width=1436)
default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"]
- <-Reducer 13 [SIMPLE_EDGE]
- SHUFFLE [RS_50]
+ <-Reducer 19 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_213]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_171] (rows=231000 width=1445)
+ Map Join Operator [MAPJOIN_212] (rows=231000 width=1445)
Conds:(Inner),Output:["_col0","_col1"]
- <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_205]
- Select Operator [SEL_204] (rows=1 width=8)
- Filter Operator [FIL_203] (rows=1 width=8)
+ <-Reducer 12 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_209]
+ Select Operator [SEL_208] (rows=1 width=8)
+ Filter Operator [FIL_207] (rows=1 width=8)
predicate:(sq_count_check(_col0) <= 1)
- Group By Operator [GBY_202] (rows=1 width=8)
+ Group By Operator [GBY_206] (rows=1 width=8)
Output:["_col0"],aggregations:["count(VALUE._col0)"]
<-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_201]
- Group By Operator [GBY_200] (rows=1 width=8)
+ PARTITION_ONLY_SHUFFLE [RS_205]
+ Group By Operator [GBY_204] (rows=1 width=8)
Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_199] (rows=9131 width=1119)
- Group By Operator [GBY_198] (rows=9131 width=1119)
+ Select Operator [SEL_203] (rows=9131 width=1119)
+ Group By Operator [GBY_202] (rows=9131 width=1119)
Output:["_col0"],keys:KEY._col0
<-Map 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_185]
+ SHUFFLE [RS_186]
PartitionCols:_col0
- Group By Operator [GBY_183] (rows=18262 width=1119)
+ Group By Operator [GBY_184] (rows=18262 width=1119)
Output:["_col0"],keys:d_month_seq
- Select Operator [SEL_181] (rows=18262 width=1119)
+ Select Operator [SEL_182] (rows=18262 width=1119)
Output:["d_month_seq"]
- Filter Operator [FIL_179] (rows=18262 width=1119)
+ Filter Operator [FIL_180] (rows=18262 width=1119)
predicate:((d_moy = 2) and (d_year = 2000))
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"]
- <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_211]
- Select Operator [SEL_210] (rows=231000 width=1436)
- Output:["_col0","_col1"]
- Group By Operator [GBY_209] (rows=231000 width=1436)
- Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
- <-Map 21 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_208]
- PartitionCols:_col0
- Group By Operator [GBY_207] (rows=462000 width=1436)
- Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category
- Filter Operator [FIL_206] (rows=462000 width=1436)
- predicate:i_category is not null
- TableScan [TS_23] (rows=462000 width=1436)
- default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"]
+ <-Select Operator [SEL_211] (rows=231000 width=1436)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_210] (rows=231000 width=1436)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
+ <-Map 18 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_201]
+ PartitionCols:_col0
+ Group By Operator [GBY_200] (rows=462000 width=1436)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category
+ Filter Operator [FIL_199] (rows=462000 width=1436)
+ predicate:i_category is not null
+ TableScan [TS_23] (rows=462000 width=1436)
+ default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"]
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_64]
PartitionCols:_col4
Merge Join Operator [MERGEJOIN_173] (rows=696954748 width=88)
Conds:RS_61._col5=RS_62._col0(Inner),Output:["_col4","_col9"]
- <-Reducer 18 [SIMPLE_EDGE]
+ <-Reducer 15 [SIMPLE_EDGE]
SHUFFLE [RS_62]
PartitionCols:_col0
Merge Join Operator [MERGEJOIN_170] (rows=88000001 width=860)
- Conds:RS_192._col1=RS_195._col0(Inner),Output:["_col0","_col3"]
- <-Map 17 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_192]
+ Conds:RS_193._col1=RS_196._col0(Inner),Output:["_col0","_col3"]
+ <-Map 14 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_193]
PartitionCols:_col1
- Select Operator [SEL_191] (rows=80000000 width=860)
+ Select Operator [SEL_192] (rows=80000000 width=860)
Output:["_col0","_col1"]
- Filter Operator [FIL_190] (rows=80000000 width=860)
+ Filter Operator [FIL_191] (rows=80000000 width=860)
predicate:(c_current_addr_sk is not null and c_customer_sk is not null)
TableScan [TS_13] (rows=80000000 width=860)
default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"]
- <-Map 20 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_195]
+ <-Map 17 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_196]
PartitionCols:_col0
- Select Operator [SEL_194] (rows=40000000 width=1014)
+ Select Operator [SEL_195] (rows=40000000 width=1014)
Output:["_col0","_col1"]
- Filter Operator [FIL_193] (rows=40000000 width=1014)
+ Filter Operator [FIL_194] (rows=40000000 width=1014)
predicate:ca_address_sk is not null
TableScan [TS_16] (rows=40000000 width=1014)
default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
@@ -188,70 +185,70 @@ Stage-0
SHUFFLE [RS_61]
PartitionCols:_col5
Merge Join Operator [MERGEJOIN_169] (rows=633595212 width=88)
- Conds:RS_58._col0=RS_219._col0(Inner),Output:["_col4","_col5"]
+ Conds:RS_58._col0=RS_221._col0(Inner),Output:["_col4","_col5"]
<-Reducer 2 [SIMPLE_EDGE]
PARTITION_ONLY_SHUFFLE [RS_58]
PartitionCols:_col0
Merge Join Operator [MERGEJOIN_168] (rows=80353 width=1119)
- Conds:RS_177._col1=RS_187._col0(Inner),Output:["_col0"]
+ Conds:RS_178._col1=RS_188._col0(Inner),Output:["_col0"]
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_177]
+ SHUFFLE [RS_178]
PartitionCols:_col1
- Select Operator [SEL_176] (rows=73049 width=1119)
+ Select Operator [SEL_177] (rows=73049 width=1119)
Output:["_col0","_col1"]
- Filter Operator [FIL_175] (rows=73049 width=1119)
+ Filter Operator [FIL_176] (rows=73049 width=1119)
predicate:(d_date_sk is not null and d_month_seq is not null)
TableScan [TS_0] (rows=73049 width=1119)
default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
<-Reducer 10 [ONE_TO_ONE_EDGE] vectorized
- FORWARD [RS_187]
+ FORWARD [RS_188]
PartitionCols:_col0
- Group By Operator [GBY_186] (rows=9131 width=1119)
+ Group By Operator [GBY_187] (rows=9131 width=1119)
Output:["_col0"],keys:KEY._col0
<-Map 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_184]
+ SHUFFLE [RS_185]
PartitionCols:_col0
- Group By Operator [GBY_182] (rows=18262 width=1119)
+ Group By Operator [GBY_183] (rows=18262 width=1119)
Output:["_col0"],keys:d_month_seq
- Select Operator [SEL_180] (rows=18262 width=1119)
+ Select Operator [SEL_181] (rows=18262 width=1119)
Output:["d_month_seq"]
- Filter Operator [FIL_178] (rows=18262 width=1119)
+ Filter Operator [FIL_179] (rows=18262 width=1119)
predicate:((d_moy = 2) and (d_year = 2000) and d_month_seq is not null)
Please refer to the previous TableScan [TS_3]
- <-Map 16 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_219]
+ <-Map 13 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_221]
PartitionCols:_col0
- Select Operator [SEL_218] (rows=575995635 width=88)
+ Select Operator [SEL_220] (rows=575995635 width=88)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_217] (rows=575995635 width=88)
+ Filter Operator [FIL_219] (rows=575995635 width=88)
predicate:((ss_customer_sk BETWEEN DynamicValue(RS_62_c_c_customer_sk_min) AND DynamicValue(RS_62_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_62_c_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_65_i_i_item_sk_min) AND DynamicValue(RS_65_i_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_65_i_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_58_d_d_date_sk_min) AND DynamicValue(RS_58_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_58_d_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null)
TableScan [TS_10] (rows=575995635 width=88)
default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"]
- <-Reducer 15 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_216]
- Group By Operator [GBY_215] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Reducer 14 [CUSTOM_SIMPLE_EDGE]
- SHUFFLE [RS_135]
- Group By Operator [GBY_134] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_133] (rows=169400 width=1436)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_54]
- <-Reducer 19 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_197]
- Group By Operator [GBY_196] (rows=1 width=12)
+ <-Reducer 16 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_198]
+ Group By Operator [GBY_197] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"]
- <-Reducer 18 [CUSTOM_SIMPLE_EDGE]
+ <-Reducer 15 [CUSTOM_SIMPLE_EDGE]
SHUFFLE [RS_130]
Group By Operator [GBY_129] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"]
Select Operator [SEL_128] (rows=88000001 width=860)
Output:["_col0"]
Please refer to the previous Merge Join Operator [MERGEJOIN_170]
+ <-Reducer 21 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_218]
+ Group By Operator [GBY_217] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Reducer 20 [CUSTOM_SIMPLE_EDGE]
+ SHUFFLE [RS_135]
+ Group By Operator [GBY_134] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_133] (rows=169400 width=1436)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_54]
<-Reducer 8 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_189]
- Group By Operator [GBY_188] (rows=1 width=12)
+ BROADCAST [RS_190]
+ Group By Operator [GBY_189] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
<-Reducer 2 [CUSTOM_SIMPLE_EDGE]
PARTITION_ONLY_SHUFFLE [RS_125]
[2/2] hive git commit: HIVE-18201 : Disable XPROD_EDGE for
sq_count_check() created for scalar subqueries (Ashutosh Chauhan via Jesus
Camacho Rodriguez)
Posted by ha...@apache.org.
HIVE-18201 : Disable XPROD_EDGE for sq_count_check() created for scalar subqueries (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4d436953
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4d436953
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4d436953
Branch: refs/heads/master
Commit: 4d436953e6be1302a0867aa16a8c5ecd2804eed7
Parents: e9e1f8f
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Tue Dec 12 15:15:00 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jul 31 21:36:09 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 25 +-
.../test/queries/clientpositive/perf/query6.q | 2 +
.../queries/clientpositive/subquery_in_having.q | 4 +-
.../clientpositive/llap/auto_join_filters.q.out | 4 +-
.../clientpositive/llap/auto_join_nulls.q.out | 2 +-
.../results/clientpositive/llap/mapjoin2.q.out | 2 +-
.../clientpositive/llap/mapjoin_hint.q.out | 62 ++-
.../llap/subquery_in_having.q.out | 427 ++++++++-----------
.../llap/tez_fixed_bucket_pruning.q.out | 252 +++++------
.../llap/vector_complex_all.q.out | 94 ++--
.../llap/vector_groupby_mapjoin.q.out | 113 ++---
.../llap/vector_join_filters.q.out | 2 +-
.../llap/vectorized_multi_output_select.q.out | 58 ++-
.../clientpositive/perf/tez/query6.q.out | 189 ++++----
15 files changed, 592 insertions(+), 646 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index cce908f..093b4a7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2027,6 +2027,8 @@ public class HiveConf extends Configuration {
"However, if it is on, and the predicted number of entries in hashtable for a given join \n" +
"input is larger than this number, the join will not be converted to a mapjoin. \n" +
"The value \"-1\" means no limit."),
+ XPRODSMALLTABLEROWSTHRESHOLD("hive.xprod.mapjoin.small.table.rows", 1,"Maximum number of rows on build side"
+ + " of map join before it switches over to cross product edge"),
HIVECONVERTJOINMAXSHUFFLESIZE("hive.auto.convert.join.shuffle.max.size", 10000000000L,
"If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" +
"However, if it is on, and the predicted size of the larger input for a given join is greater \n" +
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 011dadf..4145baf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -113,14 +113,6 @@ public class ConvertJoinMapJoin implements NodeProcessor {
MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf, llapInfo);
joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
- // not use map join in case of cross product
- boolean cartesianProductEdgeEnabled =
- HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
- if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) {
- fallbackToMergeJoin(joinOp, context);
- return null;
- }
-
TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) &
!context.parseContext.getDisableMapJoin();
@@ -988,6 +980,23 @@ public class ConvertJoinMapJoin implements NodeProcessor {
return -1;
}
+ // only allow cross product in map joins if build side is 'small'
+ boolean cartesianProductEdgeEnabled =
+ HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
+ if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) {
+ for (int i = 0 ; i < joinOp.getParentOperators().size(); i ++) {
+ if (i != bigTablePosition) {
+ Statistics parentStats = joinOp.getParentOperators().get(i).getStatistics();
+ if (parentStats.getNumRows() >
+ HiveConf.getIntVar(context.conf, HiveConf.ConfVars.XPRODSMALLTABLEROWSTHRESHOLD)) {
+ // if any of smaller side is estimated to generate more than
+ // threshold rows we would disable mapjoin
+ return -1;
+ }
+ }
+ }
+ }
+
// We store the total memory that this MapJoin is going to use,
// which is calculated as totalSize/buckets, with totalSize
// equal to sum of small tables size.
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/queries/clientpositive/perf/query6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query6.q b/ql/src/test/queries/clientpositive/perf/query6.q
index d45045d..aabce52 100644
--- a/ql/src/test/queries/clientpositive/perf/query6.q
+++ b/ql/src/test/queries/clientpositive/perf/query6.q
@@ -1,3 +1,5 @@
+set hive.auto.convert.join=true;
+set hive.tez.cartesian-product.enabled=true;
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query6.tpl and seed 1819994127
explain
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/queries/clientpositive/subquery_in_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q
index ec6981b..8b6d1a7 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -1,5 +1,6 @@
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
+set hive.optimize.shared.work.extended=false;
-- SORT_QUERY_RESULTS
-- data setup
@@ -154,4 +155,5 @@ group by key, value
having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
DROP TABLE src_null_n4;
-DROP TABLE part_subq;
\ No newline at end of file
+DROP TABLE part_subq;
+reset hive.optimize.shared.work.extended;
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
index 7a271fc..a639792 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out
@@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@myinput1_n5
-Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1_n5
@@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in/000001_0' into tabl
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@smb_input2_n0
-Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1_n5
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
index c7bb127..194fc5d 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
@@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@myinput1_n2
-Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a JOIN myinput1_n2 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1_n2
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
index 4638fce..872f918 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
@@ -57,7 +57,7 @@ POSTHOOK: Input: default@tbl_n1
#### A masked pattern was here ####
false false true true
true true false false
-Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
index 5cccce9..3c6270a 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
@@ -527,7 +527,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null)
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null)
@@ -541,8 +541,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 (XPROD_EDGE)
- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -558,13 +558,33 @@ STAGE PLANS:
expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ input vertices:
+ 1 Reducer 3
+ 2 Map 2
+ Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: part_null_n1
@@ -589,31 +609,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: no inputs
- Reducer 2
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- keys:
- 0
- 1
- 2
- outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index 20428e1..af8e23a 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -1570,10 +1570,9 @@ POSTHOOK: Output: default@src_null_n4
POSTHOOK: Lineage: src_null_n4.key SCRIPT []
POSTHOOK: Lineage: src_null_n4.value EXPRESSION []
Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[134][bigTable=?] in task 'Map 1' is a cross product
-Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
-Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product
+Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product
PREHOOK: query: explain
select key, value, count(*)
from src_null_n4 b
@@ -1597,18 +1596,16 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE)
- Reducer 11 <- Map 10 (SIMPLE_EDGE)
- Reducer 12 <- Map 10 (SIMPLE_EDGE)
- Reducer 13 <- Map 10 (SIMPLE_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
- Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 5 <- Map 1 (SIMPLE_EDGE)
- Reducer 6 <- Map 1 (XPROD_EDGE), Reducer 9 (XPROD_EDGE)
- Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+ Map 1 <- Reducer 4 (BROADCAST_EDGE)
+ Map 6 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
+ Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+ Reducer 5 <- Map 3 (SIMPLE_EDGE)
+ Reducer 7 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
- Reducer 9 <- Map 1 (SIMPLE_EDGE)
+ Reducer 9 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1628,7 +1625,7 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Reducer 11
+ 1 Reducer 4
residual filter predicates: {(_col2 <> _col1)}
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
Filter Operator
@@ -1661,38 +1658,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2, _col3
- input vertices:
- 1 Reducer 12
- residual filter predicates: {(_col2 <> _col1)}
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col3 is null (type: boolean)
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col1 (type: string), _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key > '9') (type: boolean)
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
@@ -1704,6 +1669,39 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src_null_n4
+ filterExpr: value is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
@@ -1716,7 +1714,7 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Reducer 13
+ 1 Reducer 5
residual filter predicates: {(_col2 <> _col1)}
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
Filter Operator
@@ -1736,87 +1734,61 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
- Map 10
- Map Operator Tree:
- TableScan
- alias: src_null_n4
- filterExpr: value is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: value (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Reducer 11
+ Reducer 10
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), true (type: boolean)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: boolean)
- Reducer 12
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), true (type: boolean)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: boolean)
- Reducer 13
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col2 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Reducer 11
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: string)
+ keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), true (type: boolean)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: boolean)
+ key expressions: _col1 (type: string), _col0 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: boolean)
Reducer 2
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -1838,7 +1810,7 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col4, _col5
input vertices:
- 1 Reducer 4
+ 1 Reducer 8
Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -1848,7 +1820,7 @@ STAGE PLANS:
1 _col1 (type: string), _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
input vertices:
- 1 Reducer 8
+ 1 Reducer 11
Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
@@ -1864,63 +1836,42 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- residual filter predicates: {(_col1 <> _col2)}
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col2 (type: string), _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(), count(_col2)
- keys: _col1 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: boolean)
Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: boolean)
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -1934,39 +1885,34 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Reducer 6
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- residual filter predicates: {(_col1 <> _col2)}
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col2 (type: string), _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ input vertices:
+ 0 Map 1
+ residual filter predicates: {(_col1 <> _col2)}
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
- Reducer 7
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reducer 8
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -1977,45 +1923,22 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col0, _col2
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col2 is not null (type: boolean)
+ Group By Operator
+ aggregations: count(), count(_col2)
+ keys: _col1 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col1, _col2
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col1 (type: string), _col2 (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: bigint)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reducer 8
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string), _col0 (type: bigint)
- sort order: ++
- Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: boolean)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reducer 9
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
@@ -2031,10 +1954,33 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ input vertices:
+ 0 Map 1
+ residual filter predicates: {(_col1 <> _col2)}
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -2043,10 +1989,9 @@ STAGE PLANS:
ListSink
Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[134][bigTable=?] in task 'Map 1' is a cross product
-Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
-Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product
+Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product
PREHOOK: query: select key, value, count(*)
from src_null_n4 b
where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
index 2c38d8c..98b2013 100644
--- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
@@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@l3_monthly_dw_dimplan
POSTHOOK: Output: default@l3_monthly_dw_dimplan
#### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: EXPLAIN EXTENDED
SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -478,8 +478,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -497,13 +497,59 @@ STAGE PLANS:
expressions: plan_detail_object_id (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 3 => 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col2
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- tag: 0
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Estimated key counts: Map 4 => 90170
+ keys:
+ 0 _col2 (type: bigint), _col0 (type: bigint)
+ 1 _col1 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col2, _col5
+ input vertices:
+ 1 Map 4
+ Position of Big Table: 0
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: bigint), _col5 (type: bigint)
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Estimated key counts: Map 5 => 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col1 (type: bigint)
+ outputColumnNames: _col5, _col7
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: bigint), _col7 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: bigint)
+ null sort order: aa
+ sort order: ++
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 5
+ TopN Hash Memory Usage: 0.1
+ auto parallelism: false
Execution mode: vectorized, llap
LLAP IO: all inputs
Path -> Alias:
@@ -559,7 +605,7 @@ STAGE PLANS:
name: default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1
Truncated Path -> Alias:
/l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw]
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: snap
@@ -631,7 +677,7 @@ STAGE PLANS:
name: default.l3_clarity__l3_snap_number_2018022300104
Truncated Path -> Alias:
/l3_clarity__l3_snap_number_2018022300104 [snap]
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: s1
@@ -712,7 +758,7 @@ STAGE PLANS:
name: default.l3_monthly_dw_dimplan
Truncated Path -> Alias:
/l3_monthly_dw_dimplan [s1]
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: s2
@@ -792,60 +838,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2]
Reducer 2
- Execution mode: llap
- Needs Tagging: false
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col2
- Position of Big Table: 0
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- Estimated key counts: Map 5 => 90170
- keys:
- 0 _col2 (type: bigint), _col0 (type: bigint)
- 1 _col1 (type: bigint), _col3 (type: bigint)
- outputColumnNames: _col2, _col5
- input vertices:
- 1 Map 5
- Position of Big Table: 0
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col2 (type: bigint), _col5 (type: bigint)
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- Estimated key counts: Map 6 => 1
- keys:
- 0 _col0 (type: bigint)
- 1 _col1 (type: bigint)
- outputColumnNames: _col5, _col7
- input vertices:
- 1 Map 6
- Position of Big Table: 0
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col5 (type: bigint), _col7 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: bigint), _col1 (type: bigint)
- null sort order: aa
- sort order: ++
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- TopN: 5
- TopN Hash Memory Usage: 0.1
- auto parallelism: false
- Reducer 3
Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
@@ -889,7 +881,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
@@ -931,7 +923,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan
7147200 NULL 27114
7147200 NULL 27114
7147200 NULL 27114
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: EXPLAIN EXTENDED
SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -985,8 +977,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1004,13 +996,59 @@ STAGE PLANS:
expressions: plan_detail_object_id (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 3 => 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col2
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- tag: 0
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Estimated key counts: Map 4 => 90170
+ keys:
+ 0 _col2 (type: bigint), _col0 (type: bigint)
+ 1 _col1 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col2, _col5
+ input vertices:
+ 1 Map 4
+ Position of Big Table: 0
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: bigint), _col5 (type: bigint)
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ Estimated key counts: Map 5 => 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col1 (type: bigint)
+ outputColumnNames: _col5, _col7
+ input vertices:
+ 1 Map 5
+ Position of Big Table: 0
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: bigint), _col7 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: bigint)
+ null sort order: aa
+ sort order: ++
+ Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 5
+ TopN Hash Memory Usage: 0.1
+ auto parallelism: false
Execution mode: vectorized, llap
LLAP IO: all inputs
Path -> Alias:
@@ -1066,7 +1104,7 @@ STAGE PLANS:
name: default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1
Truncated Path -> Alias:
/l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw]
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: snap
@@ -1138,7 +1176,7 @@ STAGE PLANS:
name: default.l3_clarity__l3_snap_number_2018022300104
Truncated Path -> Alias:
/l3_clarity__l3_snap_number_2018022300104 [snap]
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: s1
@@ -1220,7 +1258,7 @@ STAGE PLANS:
name: default.l3_monthly_dw_dimplan
Truncated Path -> Alias:
/l3_monthly_dw_dimplan [s1]
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: s2
@@ -1300,60 +1338,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2]
Reducer 2
- Execution mode: llap
- Needs Tagging: false
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col2
- Position of Big Table: 0
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- Estimated key counts: Map 5 => 90170
- keys:
- 0 _col2 (type: bigint), _col0 (type: bigint)
- 1 _col1 (type: bigint), _col3 (type: bigint)
- outputColumnNames: _col2, _col5
- input vertices:
- 1 Map 5
- Position of Big Table: 0
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col2 (type: bigint), _col5 (type: bigint)
- outputColumnNames: _col0, _col5
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- Estimated key counts: Map 6 => 1
- keys:
- 0 _col0 (type: bigint)
- 1 _col1 (type: bigint)
- outputColumnNames: _col5, _col7
- input vertices:
- 1 Map 6
- Position of Big Table: 0
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col5 (type: bigint), _col7 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: bigint), _col1 (type: bigint)
- null sort order: aa
- sort order: ++
- Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- TopN: 5
- TopN Hash Memory Usage: 0.1
- auto parallelism: false
- Reducer 3
Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
@@ -1397,7 +1381,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
index d5ea64f..4e1698d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
@@ -642,7 +642,7 @@ b str
two line1
four line2
six line3
-Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
INSERT INTO TABLE orc_create_complex_n0
SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2
@@ -667,7 +667,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE)
+ Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -712,7 +712,7 @@ STAGE PLANS:
dataColumns: str:string, mp:map<string,string>, lst:array<string>, strct:struct<a:string,b:string>
partitionColumnCount: 0
scratchColumnTypeNames: []
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: spam2
@@ -751,7 +751,7 @@ STAGE PLANS:
dataColumns: str:string, mp:map<string,string>, lst:array<string>, strct:struct<a:string,b:string>
partitionColumnCount: 0
scratchColumnTypeNames: []
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: spam1
@@ -790,7 +790,7 @@ STAGE PLANS:
dataColumns: str:string, mp:map<string,string>, lst:array<string>, strct:struct<a:string,b:string>
partitionColumnCount: 0
scratchColumnTypeNames: []
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: src1
@@ -806,16 +806,47 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0]
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- keyColumnNums: []
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [0]
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ Inner Join 0 to 3
+ keys:
+ 0
+ 1
+ 2
+ 3
+ Map Join Vectorization:
+ bigTableValueExpressions: col 0:string
+ className: VectorMapJoinOperator
+ native: false
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nativeConditionsNotMet: One MapJoin Condition IS false
+ outputColumnNames: _col0, _col1, _col2, _col3, _col6
+ input vertices:
+ 0 Map 1
+ 1 Map 2
+ 2 Map 3
+ Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: map<string,string>), _col2 (type: array<string>), _col3 (type: struct<a:string,b:string>), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4]
+ Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orc_create_complex_n0
Execution mode: vectorized, llap
Map Vectorization:
enabled: true
@@ -823,7 +854,7 @@ STAGE PLANS:
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: true
+ allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
@@ -831,34 +862,7 @@ STAGE PLANS:
includeColumns: [0]
dataColumns: key:string, value:string
partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reducer 2
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- Inner Join 0 to 3
- keys:
- 0
- 1
- 2
- 3
- outputColumnNames: _col0, _col1, _col2, _col3, _col6
- Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: map<string,string>), _col2 (type: array<string>), _col3 (type: struct<a:string,b:string>), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.orc_create_complex_n0
+ scratchColumnTypeNames: [string, map<string,string>, array<string>, struct<a:string,b:string>]
Stage: Stage-2
Dependency Collection
@@ -877,7 +881,7 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
-Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product
PREHOOK: query: INSERT INTO TABLE orc_create_complex_n0
SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index 10abe77..6443678 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain vectorization expression
select *
from src
@@ -26,10 +26,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 4 (SIMPLE_EDGE)
+ Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -47,14 +47,58 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Map Join Vectorization:
+ className: VectorMapJoinInnerMultiKeyOperator
native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string)
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Map Join Vectorization:
+ className: VectorMapJoinOuterStringOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5
+ input vertices:
+ 1 Reducer 5
+ Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 0), FilterExprAndExpr(children: SelectColumnIsNull(col 5:boolean), SelectColumnIsNotNull(col 0:string), FilterLongColGreaterEqualLongColumn(col 4:bigint, col 3:bigint)))
+ predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map Vectorization:
@@ -66,7 +110,7 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: src
@@ -134,39 +178,6 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col5
- input vertices:
- 1 Reducer 6
- Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
- Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
@@ -193,7 +204,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 5
+ Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
@@ -222,7 +233,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
- Reducer 6
+ Reducer 5
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
@@ -269,7 +280,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select *
from src
where not key in
@@ -298,7 +309,7 @@ POSTHOOK: Output: database:default
POSTHOOK: Output: default@orcsrc
POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select *
from orcsrc
where not key in
@@ -315,7 +326,7 @@ order by key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orcsrc
#### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select *
from orcsrc
where not key in