You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/03/07 09:45:47 UTC
[1/2] hive git commit: HIVE-13096: Cost to choose side table in
MapJoin conversion based on cumulative cardinality (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master f468748b6 -> 930b66b25
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
index 5ff2cd1..45554f7 100644
--- a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
@@ -47,7 +47,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -62,24 +62,6 @@ STAGE PLANS:
expressions: c (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized
- Map 2
- Map Operator Tree:
- TableScan
- alias: t1
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (a > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: a (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -88,7 +70,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col1
input vertices:
- 0 Map 1
+ 1 Map 2
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Select Operator
@@ -103,6 +85,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (a > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -352,7 +352,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE)
+ Map 2 <- Map 1 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -367,6 +367,25 @@ STAGE PLANS:
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c (type: int), v2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -375,7 +394,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Map 2
+ 0 Map 1
Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
File Output Operator
@@ -386,25 +405,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
- Map 2
- Map Operator Tree:
- TableScan
- alias: t2
- Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (c > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: c (type: int), v2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -708,7 +708,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE)
+ Map 2 <- Map 1 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -723,6 +723,25 @@ STAGE PLANS:
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c (type: int), v2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -731,7 +750,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col0, _col2, _col3
input vertices:
- 1 Map 2
+ 0 Map 1
Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Select Operator
@@ -746,25 +765,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
- Map 2
- Map Operator Tree:
- TableScan
- alias: t2
- Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (c > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: c (type: int), v2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -798,7 +798,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE)
+ Map 2 <- Map 1 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -813,6 +813,25 @@ STAGE PLANS:
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (c > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c (type: int), v2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -821,7 +840,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
input vertices:
- 1 Map 2
+ 0 Map 1
Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Select Operator
@@ -836,25 +855,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
- Map 2
- Map Operator Tree:
- TableScan
- alias: t2
- Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (c > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: c (type: int), v2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: vectorized
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
index 2a6f7ff..8f5090a 100644
--- a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
@@ -31,8 +31,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
- Reducer 4 <- Map 3 (SIMPLE_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+ Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -58,28 +58,12 @@ STAGE PLANS:
1 Map 2
Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col2, _col4
- input vertices:
- 1 Reducer 4
- Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col4 (type: int), _col2 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
Map 2
Map Operator Tree:
TableScan
@@ -128,11 +112,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col4 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -187,8 +188,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
- Reducer 4 <- Map 3 (SIMPLE_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+ Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -214,28 +215,12 @@ STAGE PLANS:
1 Map 2
Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col2, _col4
- input vertices:
- 1 Reducer 4
- Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col4 (type: int), _col2 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
Map 2
Map Operator Tree:
TableScan
@@ -284,11 +269,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2, _col4
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col4 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
index fc3ebb5..e1c76f5 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
@@ -4475,8 +4475,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Map 1 <- Reducer 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -4489,12 +4490,26 @@ STAGE PLANS:
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Map 2
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 '2008-04-08' (type: string)
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 3
Map Operator Tree:
TableScan
alias: srcpart
@@ -4510,36 +4525,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: '2008-04-08' (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- keys: '2008-04-08' (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 '2008-04-08' (type: string)
- input vertices:
- 0 Map 1
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reducer 4
+ Reducer 2
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
@@ -4554,6 +4540,36 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: '2008-04-08' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string)
+ sort order: +
+ Map-reduce partition columns: '2008-04-08' (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '2008-04-08' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Target Vertex: Map 1
Stage: Stage-0
Fetch Operator
[2/2] hive git commit: HIVE-13096: Cost to choose side table in
MapJoin conversion based on cumulative cardinality (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by jc...@apache.org.
HIVE-13096: Cost to choose side table in MapJoin conversion based on cumulative cardinality (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/930b66b2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/930b66b2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/930b66b2
Branch: refs/heads/master
Commit: 930b66b25c1b2d064bd3280d36845bf53308e3f9
Parents: f468748
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Feb 25 15:01:22 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Mar 7 09:44:55 2016 +0100
----------------------------------------------------------------------
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 85 ++++++---
.../llap/bucket_map_join_tez1.q.out | 183 ++++++++++---------
.../llap/dynamic_partition_pruning.q.out | 92 ++++++----
.../vectorized_dynamic_partition_pruning.q.out | 92 ++++++----
.../tez/auto_sortmerge_join_10.q.out | 42 ++---
.../tez/bucket_map_join_tez1.q.out | 177 +++++++++---------
.../tez/cross_product_check_2.q.out | 171 ++++++++---------
.../tez/dynamic_partition_pruning.q.out | 90 +++++----
.../clientpositive/tez/explainuser_2.q.out | 36 ++--
.../tez/vector_groupby_mapjoin.q.out | 53 +++---
.../clientpositive/tez/vector_inner_join.q.out | 166 ++++++++---------
.../tez/vector_mapjoin_reduce.q.out | 118 ++++++------
.../vectorized_dynamic_partition_pruning.q.out | 92 ++++++----
13 files changed, 751 insertions(+), 646 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 00bc193..b35f075 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -27,8 +27,6 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
@@ -38,16 +36,13 @@ import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MuxOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
-import org.apache.hadoop.hive.ql.exec.PTFOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -64,8 +59,8 @@ import org.apache.hadoop.hive.ql.plan.OpTraits;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.util.ReflectionUtils;
-
-import com.google.common.collect.ImmutableSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* ConvertJoinMapJoin is an optimization that replaces a common join
@@ -78,16 +73,6 @@ public class ConvertJoinMapJoin implements NodeProcessor {
private static final Logger LOG = LoggerFactory.getLogger(ConvertJoinMapJoin.class.getName());
- @SuppressWarnings({ "unchecked", "rawtypes" })
- private static final Set<Class<? extends Operator<?>>> COSTLY_OPERATORS =
- new ImmutableSet.Builder()
- .add(CommonJoinOperator.class)
- .add(GroupByOperator.class)
- .add(LateralViewJoinOperator.class)
- .add(PTFOperator.class)
- .add(ReduceSinkOperator.class)
- .add(UDTFOperator.class)
- .build();
@Override
/*
@@ -146,9 +131,11 @@ public class ConvertJoinMapJoin implements NodeProcessor {
}
}
- LOG.info("Convert to non-bucketed map join");
// check if we can convert to map join no bucket scaling.
- mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1);
+ LOG.info("Convert to non-bucketed map join");
+ if (numBuckets != 1) {
+ mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1);
+ }
if (mapJoinConversionPos < 0) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
@@ -557,8 +544,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
int bigTablePosition = -1;
- // number of costly ops (Join, GB, PTF/Windowing, TF) below the big input
- int bigInputNumberCostlyOps = -1;
+ // big input cumulative row count
+ long bigInputCumulativeCardinality = -1L;
// stats of the big input
Statistics bigInputStat = null;
@@ -602,18 +589,27 @@ public class ConvertJoinMapJoin implements NodeProcessor {
}
}
- int currentInputNumberCostlyOps = foundInputNotFittingInMemory ?
- -1 : OperatorUtils.countOperatorsUpstream(parentOp, COSTLY_OPERATORS);
+ long currentInputCumulativeCardinality;
+ if (foundInputNotFittingInMemory) {
+ currentInputCumulativeCardinality = -1L;
+ } else {
+ Long cardinality = computeCumulativeCardinality(parentOp);
+ if (cardinality == null) {
+ // We could not get stats, we cannot convert
+ return -1;
+ }
+ currentInputCumulativeCardinality = cardinality;
+ }
// This input is the big table if it is contained in the big candidates set, and either:
// 1) we have not chosen a big table yet, or
// 2) it has been chosen as the big table above, or
- // 3) the number of costly operators for this input is higher, or
- // 4) the number of costly operators is equal, but the size is bigger,
+ // 3) the cumulative cardinality for this input is higher, or
+ // 4) the cumulative cardinality is equal, but the size is bigger,
boolean selectedBigTable = bigTableCandidateSet.contains(pos) &&
(bigInputStat == null || currentInputNotFittingInMemory ||
- (!foundInputNotFittingInMemory && (currentInputNumberCostlyOps > bigInputNumberCostlyOps ||
- (currentInputNumberCostlyOps == bigInputNumberCostlyOps && inputSize > bigInputStat.getDataSize()))));
+ (!foundInputNotFittingInMemory && (currentInputCumulativeCardinality > bigInputCumulativeCardinality ||
+ (currentInputCumulativeCardinality == bigInputCumulativeCardinality && inputSize > bigInputStat.getDataSize()))));
if (bigInputStat != null && selectedBigTable) {
// We are replacing the current big table with a new one, thus
@@ -633,7 +629,7 @@ public class ConvertJoinMapJoin implements NodeProcessor {
if (selectedBigTable) {
bigTablePosition = pos;
- bigInputNumberCostlyOps = currentInputNumberCostlyOps;
+ bigInputCumulativeCardinality = currentInputCumulativeCardinality;
bigInputStat = currInputStat;
}
@@ -642,6 +638,39 @@ public class ConvertJoinMapJoin implements NodeProcessor {
return bigTablePosition;
}
+ // This is akin to CBO cumulative cardinality model
+ private static Long computeCumulativeCardinality(Operator<? extends OperatorDesc> op) {
+ long cumulativeCardinality = 0L;
+ if (op instanceof CommonJoinOperator) {
+ // Choose max
+ for (Operator<? extends OperatorDesc> inputOp : op.getParentOperators()) {
+ Long inputCardinality = computeCumulativeCardinality(inputOp);
+ if (inputCardinality == null) {
+ return null;
+ }
+ if (inputCardinality > cumulativeCardinality) {
+ cumulativeCardinality = inputCardinality;
+ }
+ }
+ } else {
+ // Choose cumulative
+ for (Operator<? extends OperatorDesc> inputOp : op.getParentOperators()) {
+ Long inputCardinality = computeCumulativeCardinality(inputOp);
+ if (inputCardinality == null) {
+ return null;
+ }
+ cumulativeCardinality += inputCardinality;
+ }
+ }
+ Statistics currInputStat = op.getStatistics();
+ if (currInputStat == null) {
+ LOG.warn("Couldn't get statistics from: " + op);
+ return null;
+ }
+ cumulativeCardinality += currInputStat.getNumRows();
+ return cumulativeCardinality;
+ }
+
/*
* Once we have decided on the map join, the tree would transform from
*
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
index c743edc..8f054f2 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
@@ -344,7 +344,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -384,15 +385,31 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
Reducer 2
- Execution mode: uber
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
@@ -404,28 +421,12 @@ STAGE PLANS:
expressions: _col1 (type: double), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
Stage: Stage-0
Fetch Operator
@@ -953,7 +954,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -993,15 +995,31 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
Reducer 2
- Execution mode: uber
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
@@ -1009,28 +1027,12 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
Stage: Stage-0
Fetch Operator
@@ -1057,7 +1059,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1091,15 +1094,31 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
Reducer 2
- Execution mode: uber
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
@@ -1107,28 +1126,12 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 335a239..9f38717 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -4906,8 +4906,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Map 1 <- Reducer 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -4920,13 +4921,27 @@ STAGE PLANS:
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 '2008-04-08' (type: string)
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Execution mode: llap
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: srcpart
@@ -4943,36 +4958,7 @@ STAGE PLANS:
Map-reduce partition columns: '2008-04-08' (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- keys: '2008-04-08' (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 '2008-04-08' (type: string)
- input vertices:
- 0 Map 1
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reducer 4
+ Reducer 2
Execution mode: uber
Reduce Operator Tree:
Group By Operator
@@ -4987,6 +4973,36 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: '2008-04-08' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string)
+ sort order: +
+ Map-reduce partition columns: '2008-04-08' (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '2008-04-08' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Target Vertex: Map 1
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index c482c4e..35b7544 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -4541,8 +4541,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Map 1 <- Reducer 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -4555,13 +4556,27 @@ STAGE PLANS:
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 '2008-04-08' (type: string)
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Execution mode: llap
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: srcpart
@@ -4578,36 +4593,7 @@ STAGE PLANS:
Map-reduce partition columns: '2008-04-08' (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
- Reducer 3
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: '2008-04-08' (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 '2008-04-08' (type: string)
- input vertices:
- 0 Map 1
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reducer 4
+ Reducer 2
Execution mode: vectorized, uber
Reduce Operator Tree:
Group By Operator
@@ -4622,6 +4608,36 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: '2008-04-08' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string)
+ sort order: +
+ Map-reduce partition columns: '2008-04-08' (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '2008-04-08' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Target Vertex: Map 1
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
index 5df187d..8bd9dc8 100644
--- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
@@ -239,8 +239,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -256,6 +256,23 @@ STAGE PLANS:
mode: final
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -263,7 +280,7 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
input vertices:
- 1 Map 3
+ 0 Map 1
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Group By Operator
@@ -275,24 +292,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
+ Reducer 3
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
index 882eff3..2e10157 100644
--- a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
@@ -338,7 +338,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -377,12 +378,28 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -395,28 +412,12 @@ STAGE PLANS:
expressions: _col1 (type: double), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
Stage: Stage-0
Fetch Operator
@@ -930,7 +931,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -969,12 +971,28 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -983,28 +1001,12 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
Stage: Stage-0
Fetch Operator
@@ -1031,7 +1033,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1064,12 +1067,28 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -1078,28 +1097,12 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 3
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out
index f00be43..09f2fc5 100644
--- a/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out
@@ -91,7 +91,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 3' is a cross product
PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
@@ -105,7 +105,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+ Map 3 <- Map 1 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -131,23 +132,10 @@ STAGE PLANS:
1 Map 2
Statistics: Num rows: 11 Data size: 105 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- input vertices:
- 1 Map 3
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 11 Data size: 105 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
Map 2
Map Operator Tree:
TableScan
@@ -175,10 +163,23 @@ STAGE PLANS:
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -186,7 +187,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain select * from A join
(select d1.key
from B d1 join B d2 on d1.key = d2.key
@@ -206,8 +207,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
+ Map 1 <- Reducer 3 (BROADCAST_EDGE)
Map 2 <- Map 4 (BROADCAST_EDGE)
- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -219,10 +221,23 @@ STAGE PLANS:
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ input vertices:
+ 1 Reducer 3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Map 2
Map Operator Tree:
TableScan
@@ -280,23 +295,10 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- input vertices:
- 0 Map 1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Stage: Stage-0
Fetch Operator
@@ -305,7 +307,7 @@ STAGE PLANS:
ListSink
Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 2' is a cross product
-Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
@@ -319,8 +321,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
+ Map 1 <- Reducer 3 (BROADCAST_EDGE)
Map 2 <- Map 4 (BROADCAST_EDGE)
- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -332,10 +335,23 @@ STAGE PLANS:
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ input vertices:
+ 1 Reducer 3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Map 2
Map Operator Tree:
TableScan
@@ -382,23 +398,10 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- input vertices:
- 0 Map 1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Stage: Stage-0
Fetch Operator
@@ -406,7 +409,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Reducer 4' is a cross product
+Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Reducer 2' is a cross product
PREHOOK: query: explain select * from
(select A.key from A group by key) ss join
(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
@@ -425,8 +428,8 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Map 3 <- Map 5 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -505,17 +508,6 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Reducer 4
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -524,7 +516,7 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1
input vertices:
- 0 Reducer 2
+ 1 Reducer 4
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -533,6 +525,17 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
index ce6e8d5..b4b14c2 100644
--- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
@@ -4758,8 +4758,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Map 1 <- Reducer 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -4772,12 +4773,26 @@ STAGE PLANS:
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
- Map 2
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 '2008-04-08' (type: string)
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 3
Map Operator Tree:
TableScan
alias: srcpart
@@ -4793,35 +4808,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: '2008-04-08' (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
- Reduce Operator Tree:
- Group By Operator
- keys: '2008-04-08' (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 '2008-04-08' (type: string)
- input vertices:
- 0 Map 1
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reducer 4
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -4835,6 +4822,35 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ keys: '2008-04-08' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string)
+ sort order: +
+ Map-reduce partition columns: '2008-04-08' (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '2008-04-08' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Target Vertex: Map 1
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index f2b225d..db1c5b5 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -1018,9 +1018,9 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Map 1 <- Map 2 (BROADCAST_EDGE)
Map 10 <- Map 9 (BROADCAST_EDGE)
-Map 3 <- Map 1 (BROADCAST_EDGE), Map 10 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE)
+Map 2 <- Map 1 (BROADCAST_EDGE)
+Map 3 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
@@ -1049,26 +1049,26 @@ Stage-0
Output:["_col2","_col12","_col20","_col13","_col21","_col3"]
Map Join Operator [MAPJOIN_97] (rows=1610 width=10)
Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"]
- <-Map 1 [BROADCAST_EDGE]
+ <-Map 2 [BROADCAST_EDGE]
BROADCAST [RS_44]
PartitionCols:_col1, _col3
Map Join Operator [MAPJOIN_91] (rows=275 width=10)
- Conds:SEL_2._col0=RS_42._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"]
- <-Map 2 [BROADCAST_EDGE]
- BROADCAST [RS_42]
+ Conds:RS_41._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"]
+ <-Map 1 [BROADCAST_EDGE]
+ BROADCAST [RS_41]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=250 width=10)
- Output:["_col0"]
- Filter Operator [FIL_84] (rows=250 width=10)
- predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null)
- TableScan [TS_3] (rows=500 width=10)
- default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
- <-Select Operator [SEL_2] (rows=170 width=34)
- Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_83] (rows=170 width=34)
- predicate:((v2 is not null and v3 is not null) and k1 is not null)
- TableScan [TS_0] (rows=170 width=34)
- default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"]
+ Select Operator [SEL_2] (rows=170 width=34)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_83] (rows=170 width=34)
+ predicate:((v2 is not null and v3 is not null) and k1 is not null)
+ TableScan [TS_0] (rows=170 width=34)
+ default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"]
+ <-Select Operator [SEL_5] (rows=250 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_84] (rows=250 width=10)
+ predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null)
+ TableScan [TS_3] (rows=500 width=10)
+ default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
<-Select Operator [SEL_40] (rows=1464 width=10)
Output:["_col14","_col15","_col17","_col6","_col7"]
Map Join Operator [MAPJOIN_96] (rows=1464 width=10)
http://git-wip-us.apache.org/repos/asf/hive/blob/930b66b2/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
index 037c338..5c3e198 100644
--- a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
@@ -1,4 +1,4 @@
-Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly.
explain
select *
@@ -18,18 +18,19 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 4 vectorized
+ Reducer 2 vectorized
File Output Operator [FS_34]
Select Operator [OP_33] (rows=302 width=10)
Output:["_col0","_col1"]
- <-Reducer 3 [SIMPLE_EDGE] vectorized
+ <-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_22]
Select Operator [SEL_21] (rows=302 width=10)
Output:["_col0","_col1"]
@@ -46,28 +47,28 @@ Stage-0
default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key"]
<-Map Join Operator [MAPJOIN_28] (rows=550 width=10)
Conds:(Inner),Output:["_col0","_col1"]
- <-Map 1 [BROADCAST_EDGE]
- BROADCAST [RS_14]
- Select Operator [SEL_1] (rows=500 width=10)
- Output:["_col0","_col1"]
- TableScan [TS_0] (rows=500 width=10)
- default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
- <-Select Operator [SEL_10] (rows=1 width=8)
- Filter Operator [FIL_9] (rows=1 width=8)
- predicate:(_col0 = 0)
- Group By Operator [OP_32] (rows=1 width=8)
- Output:["_col0"],aggregations:["count(VALUE._col0)"]
- <-Map 2 [SIMPLE_EDGE]
- SHUFFLE [RS_6]
- Group By Operator [GBY_5] (rows=1 width=8)
- Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_4] (rows=250 width=10)
- Filter Operator [FIL_26] (rows=250 width=10)
- predicate:key is null
- TableScan [TS_2] (rows=500 width=10)
- default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key"]
+ <-Reducer 4 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_15]
+ Select Operator [SEL_10] (rows=1 width=8)
+ Filter Operator [FIL_9] (rows=1 width=8)
+ predicate:(_col0 = 0)
+ Group By Operator [OP_32] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_6]
+ Group By Operator [GBY_5] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Select Operator [SEL_4] (rows=250 width=10)
+ Filter Operator [FIL_26] (rows=250 width=10)
+ predicate:key is null
+ TableScan [TS_2] (rows=500 width=10)
+ default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key"]
+ <-Select Operator [SEL_1] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ TableScan [TS_0] (rows=500 width=10)
+ default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
-Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select *
from src
where not key in