You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/07/25 23:16:35 UTC
hive git commit: HIVE-14326: Merging outer joins without conditions
can lead to wrong results (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 964b8a001 -> 30b40c49a
HIVE-14326: Merging outer joins without conditions can lead to wrong results (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/30b40c49
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/30b40c49
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/30b40c49
Branch: refs/heads/master
Commit: 30b40c49a8381dff5be79496247433079856e0ef
Parents: 964b8a0
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Jul 25 14:05:04 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Jul 26 00:15:44 2016 +0100
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 4 +
.../queries/clientpositive/cross_join_merge.q | 1 +
.../clientpositive/cross_join_merge.q.out | 44 +++++--
.../clientpositive/ppd_outer_join5.q.out | 127 +++++++++++++++----
.../clientpositive/spark/ppd_outer_join5.q.out | 97 ++++++++++----
5 files changed, 208 insertions(+), 65 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 698efdc..2671cb1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8922,6 +8922,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
private ObjectPair<Integer, int[]> findMergePos(QBJoinTree node, QBJoinTree target) {
int res = -1;
String leftAlias = node.getLeftAlias();
+ if (leftAlias == null && (!node.getNoOuterJoin() || !target.getNoOuterJoin())) {
+ // Cross with outer join: currently we do not merge
+ return new ObjectPair(-1, null);
+ }
ArrayList<ASTNode> nodeCondn = node.getExpressions().get(0);
ArrayList<ASTNode> targetCondn = null;
http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/queries/clientpositive/cross_join_merge.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cross_join_merge.q b/ql/src/test/queries/clientpositive/cross_join_merge.q
index 3ba4727..50f5813 100644
--- a/ql/src/test/queries/clientpositive/cross_join_merge.q
+++ b/ql/src/test/queries/clientpositive/cross_join_merge.q
@@ -10,6 +10,7 @@ select src1.key from src src1 join src src2 join src src3 where src1.key=src2.ke
explain
select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key;
+-- no merge
explain
select src1.key from src src1 left outer join src src2 join src src3;
http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/results/clientpositive/cross_join_merge.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cross_join_merge.q.out b/ql/src/test/results/clientpositive/cross_join_merge.q.out
index ccf2ff6..f15161a 100644
--- a/ql/src/test/results/clientpositive/cross_join_merge.q.out
+++ b/ql/src/test/results/clientpositive/cross_join_merge.q.out
@@ -337,16 +337,20 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
-PREHOOK: query: explain
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- no merge
+explain
select src1.key from src src1 left outer join src src2 join src src3
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: -- no merge
+explain
select src1.key from src src1 left outer join src src2 join src src3
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -371,6 +375,30 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 250000 Data size: 3906000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250000 Data size: 3906000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
TableScan
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
@@ -382,17 +410,15 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Left Outer Join0 to 1
- Inner Join 0 to 2
+ Inner Join 0 to 1
keys:
0
1
- 2
outputColumnNames: _col0
- Statistics: Num rows: 125000000 Data size: 2453000000 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125000000 Data size: 2578000000 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 125000000 Data size: 2453000000 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125000000 Data size: 2578000000 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_outer_join5.q.out b/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
index 6658cfb..cbc0e89 100644
--- a/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/ppd_outer_join5.q.out
@@ -30,14 +30,16 @@ POSTHOOK: query: create table t4 (id int, key string, value string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t4
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -71,6 +73,33 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ filter predicates:
+ 0
+ 1 {true}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
TableScan
alias: t3
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -88,25 +117,19 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
- Inner Join 0 to 2
- filter predicates:
- 0
- 1 {true}
- 2
+ Inner Join 0 to 1
keys:
0
1
- 2
outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -118,14 +141,16 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -159,6 +184,30 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col4, _col5
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
TableScan
alias: t3
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -176,21 +225,19 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Inner Join 0 to 1
- Left Outer Join0 to 2
+ Left Outer Join0 to 1
keys:
0
1
- 2
outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -202,14 +249,16 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -243,6 +292,30 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col4, _col5
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
TableScan
alias: t3
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -260,21 +333,19 @@ STAGE PLANS:
Reduce Operator Tree:
Join Operator
condition map:
- Inner Join 0 to 1
- Left Outer Join0 to 2
+ Left Outer Join0 to 1
keys:
0
1
- 2
outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/30b40c49/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
index f494f40..ae266e5 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out
@@ -30,7 +30,8 @@ POSTHOOK: query: create table t4 (id int, key string, value string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t4
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20
@@ -43,7 +44,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -62,7 +64,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: t2
@@ -78,7 +80,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: t3
@@ -99,24 +101,35 @@ STAGE PLANS:
Join Operator
condition map:
Right Outer Join0 to 1
- Inner Join 0 to 2
filter predicates:
0
1 {true}
- 2
keys:
0
1
- 2
- outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -128,7 +141,8 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20
@@ -141,7 +155,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -160,7 +175,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: t2
@@ -176,7 +191,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: t3
@@ -197,20 +212,32 @@ STAGE PLANS:
Join Operator
condition map:
Inner Join 0 to 1
- Left Outer Join0 to 2
keys:
0
1
- 2
- outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col1, _col2, _col4, _col5
Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -222,7 +249,8 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20
@@ -235,7 +263,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -254,7 +283,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: t2
@@ -270,7 +299,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: t3
@@ -291,20 +320,32 @@ STAGE PLANS:
Join Operator
condition map:
Inner Join 0 to 1
- Left Outer Join0 to 2
keys:
0
1
- 2
- outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col1, _col2, _col4, _col5
Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat