You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2017/11/07 06:27:35 UTC
[16/17] hive git commit: HIVE-17767 Rewrite correlated EXISTS/IN
subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
index e206f08..dfe4240 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
@@ -33,15 +33,18 @@ STAGE PLANS:
TableScan
alias: b
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((value > 'val_9') and key is not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Map 3
@@ -50,22 +53,22 @@ STAGE PLANS:
alias: a
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: ((key = key) and (value = value) and (value > 'val_9')) (type: boolean)
- Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((value > 'val_9') and key is not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -78,10 +81,10 @@ STAGE PLANS:
0 _col0 (type: string), _col1 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 32 Data size: 5696 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 32 Data size: 5696 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -237,16 +240,19 @@ STAGE PLANS:
TableScan
alias: b
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string)
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
Execution mode: llap
LLAP IO: no inputs
Map 3
@@ -1074,13 +1080,13 @@ POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tx1
PREHOOK: query: insert into tx1 values (1, 1),
- (1, 2),
- (1, 3)
+ (1, 2),
+ (1, 3)
PREHOOK: type: QUERY
PREHOOK: Output: default@tx1
POSTHOOK: query: insert into tx1 values (1, 1),
- (1, 2),
- (1, 3)
+ (1, 2),
+ (1, 3)
POSTHOOK: type: QUERY
POSTHOOK: Output: default@tx1
POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
@@ -1111,10 +1117,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
- Reducer 5 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1122,31 +1126,44 @@ STAGE PLANS:
TableScan
alias: u
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: a (type: int), b (type: int)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: a is not null (type: boolean)
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Select Operator
+ expressions: a (type: int), b (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int)
- Group By Operator
- keys: a (type: int), b (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: v
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (a is not null and b is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Select Operator
+ expressions: a (type: int), b (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -1156,18 +1173,22 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col1 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ residual filter predicates: {(_col1 <> _col3)}
Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -1187,46 +1208,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1, _col2, _col3
- residual filter predicates: {(_col3 <> _col1)}
- Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: int), _col3 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
- Reducer 5
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int), KEY._col1 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int)
Stage: Stage-0
Fetch Operator
@@ -1274,7 +1255,6 @@ POSTHOOK: type: QUERY
POSTHOOK: Output: default@t2
POSTHOOK: Lineage: t2.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
POSTHOOK: Lineage: t2.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
@@ -1288,9 +1268,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
- Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1298,15 +1276,19 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: i (type: int), j (type: int)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: i is not null (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Select Operator
+ expressions: i (type: int), j (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
Execution mode: llap
LLAP IO: no inputs
Map 3
@@ -1314,31 +1296,24 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: i (type: int), j (type: int)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: (i is not null and j is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Select Operator
+ expressions: i (type: int), j (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int)
- Execution mode: llap
- LLAP IO: no inputs
- Map 5
- Map Operator Tree:
- TableScan
- alias: t1
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: j (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -1348,55 +1323,22 @@ STAGE PLANS:
condition map:
Left Semi Join 0 to 1
keys:
- 0 _col0 (type: int), _col1 (type: int)
- 1 _col0 (type: int), _col1 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- residual filter predicates: {(_col1 <> _col2)}
- Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ residual filter predicates: {(_col1 <> _col3)}
+ Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col2 (type: int)
+ expressions: _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int), _col1 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
- Reducer 6
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1404,7 +1346,6 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
PREHOOK: type: QUERY
PREHOOK: Input: default@t1