You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/12/12 20:51:24 UTC
[01/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master 1320d2b31 -> 38405c145
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
index 814553d..3e8ea6f 100644
--- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
@@ -89,14 +89,15 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -111,40 +112,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
@@ -156,9 +130,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -266,14 +238,15 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds is not null and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -288,40 +261,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
@@ -333,9 +279,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -467,14 +411,15 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds is not null
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -489,40 +434,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 168 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 168 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 168 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
@@ -534,9 +452,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -674,14 +590,15 @@ JOIN
ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -696,40 +613,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
@@ -741,9 +631,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -863,14 +751,15 @@ JOIN
ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -885,40 +774,13 @@ STAGE PLANS:
expressions: key (type: int), concat(value, value) (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), concat(value, value) (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
@@ -930,9 +792,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -1052,14 +912,15 @@ JOIN
ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1074,40 +935,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 2
- Map Operator Tree:
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string)
@@ -1119,9 +953,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out b/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out
index 7b8777f..8b37d2a 100644
--- a/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out
+++ b/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out
@@ -42,6 +42,57 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@src_b2
POSTHOOK: Lineage: src_b2.!@#$%^&*()_q SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: src_b2.x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (!@#$%^&*()_q is not null and (x+1 < '11')) (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: x+1 (type: string), !@#$%^&*()_q (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
where a.`x+1` < '11'
[03/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
index 0af21e0..da28ae4 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out
@@ -77,52 +77,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
- Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
- Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
@@ -135,8 +123,7 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -167,7 +154,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -302,52 +289,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
- Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean)
- Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
@@ -360,8 +335,7 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -392,7 +366,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -533,52 +507,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean)
- Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean)
- Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
Statistics: Num rows: 1 Data size: 183 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
@@ -591,8 +553,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -623,7 +584,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out b/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out
index 8e850f5..97721d0 100644
--- a/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out
+++ b/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out
@@ -42,6 +42,69 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@src_b2
POSTHOOK: Lineage: src_b2.!@#$%^&*()_q SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: src_b2.x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: !@#$%^&*()_q is not null (type: boolean)
+ Statistics: Num rows: 475 Data size: 166409 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: x+1 (type: string), !@#$%^&*()_q (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 475 Data size: 166409 Basic stats: COMPLETE Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (!@#$%^&*()_q is not null and (x+1 < '11')) (type: boolean)
+ Statistics: Num rows: 141 Data size: 49397 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: x+1 (type: string), !@#$%^&*()_q (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 141 Data size: 49397 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 522 Data size: 183049 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 522 Data size: 183049 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
where a.`x+1` < '11'
@@ -56,15 +119,6 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_b
POSTHOOK: Input: default@src_b2
#### A masked pattern was here ####
-10 val_10 10 val_10
-100 val_100 100 val_100
-100 val_100 100 val_100
-100 val_100 100 val_100
-100 val_100 100 val_100
-104 val_104 104 val_104
-104 val_104 104 val_104
-104 val_104 104 val_104
-104 val_104 104 val_104
0 val_0 0 val_0
0 val_0 0 val_0
0 val_0 0 val_0
@@ -79,3 +133,12 @@ POSTHOOK: Input: default@src_b2
103 val_103 103 val_103
103 val_103 103 val_103
105 val_105 105 val_105
+10 val_10 10 val_10
+100 val_100 100 val_100
+100 val_100 100 val_100
+100 val_100 100 val_100
+100 val_100 100 val_100
+104 val_104 104 val_104
+104 val_104 104 val_104
+104 val_104 104 val_104
+104 val_104 104 val_104
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/smb_cache.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/smb_cache.q.out b/ql/src/test/results/clientpositive/llap/smb_cache.q.out
index defade5..7614cf5 100644
--- a/ql/src/test/results/clientpositive/llap/smb_cache.q.out
+++ b/ql/src/test/results/clientpositive/llap/smb_cache.q.out
@@ -200,43 +200,86 @@ t1.userid,
from bug_201_input_b as t1
join bug_201_input_a as fa on (t1.userid = fa.userid)
POSTHOOK: type: QUERY
-Plan optimized by CBO.
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: userid is not null (type: boolean)
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: userid (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: fa
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: userid is not null (type: boolean)
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: userid (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 130 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 130 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-Stage-0
- Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 llap
- File Output Operator [FS_10]
- Merge Join Operator [MERGEJOIN_15] (rows=130 width=8)
- Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"]
- <-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_6]
- PartitionCols:_col0
- Select Operator [SEL_2] (rows=130 width=4)
- Output:["_col0"]
- Filter Operator [FIL_13] (rows=130 width=4)
- predicate:userid is not null
- TableScan [TS_0] (rows=130 width=4)
- default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"]
- <-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_7]
- PartitionCols:_col0
- Select Operator [SEL_5] (rows=527 width=4)
- Output:["_col0"]
- Filter Operator [FIL_14] (rows=527 width=4)
- predicate:userid is not null
- TableScan [TS_3] (rows=527 width=4)
- default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"]
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
PREHOOK: query: select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid)
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid
PREHOOK: type: QUERY
PREHOOK: Input: default@bug_201_input_a
PREHOOK: Input: default@bug_201_input_b
@@ -245,7 +288,7 @@ POSTHOOK: query: select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid)
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bug_201_input_a
POSTHOOK: Input: default@bug_201_input_b
@@ -265,7 +308,6 @@ POSTHOOK: Input: default@bug_201_input_b
162031843 162031843
181554703 181554703
182530846 182530846
-Warning: Value had a \n character in it.
PREHOOK: query: explain
select
t1.userid,
@@ -280,43 +322,66 @@ t1.userid,
from bug_201_input_b as t1
join bug_201_input_a as fa on (t1.userid = fa.userid)
POSTHOOK: type: QUERY
-Plan optimized by CBO.
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: userid is not null (type: boolean)
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: userid (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 130 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
+ alias: fa
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: userid is not null (type: boolean)
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: userid (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 527 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 130 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 130 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
-Stage-0
- Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 llap
- File Output Operator [FS_10]
- Merge Join Operator [MERGEJOIN_15] (rows=130 width=8)
- Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"]
- <-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_6]
- PartitionCols:_col0
- Select Operator [SEL_2] (rows=130 width=4)
- Output:["_col0"]
- Filter Operator [FIL_13] (rows=130 width=4)
- predicate:userid is not null
- TableScan [TS_0] (rows=130 width=4)
- default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"]
- <-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_7]
- PartitionCols:_col0
- Select Operator [SEL_5] (rows=527 width=4)
- Output:["_col0"]
- Filter Operator [FIL_14] (rows=527 width=4)
- predicate:userid is not null
- TableScan [TS_3] (rows=527 width=4)
- default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"]
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
PREHOOK: query: select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid)
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid
PREHOOK: type: QUERY
PREHOOK: Input: default@bug_201_input_a
PREHOOK: Input: default@bug_201_input_b
@@ -325,7 +390,7 @@ POSTHOOK: query: select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid)
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bug_201_input_a
POSTHOOK: Input: default@bug_201_input_b
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out
index e6038b8..86c10aa 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out
@@ -520,96 +520,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [$hdt$_1:b]
-
Stage: Stage-1
Spark
Edges:
@@ -630,14 +544,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 1 Map 3
Position of Big Table: 0
Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
@@ -653,13 +565,6 @@ STAGE PLANS:
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
index ff9a0f4..3db099a 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
@@ -134,19 +134,22 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
+ Reducer 4 <- Reducer 3 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -163,14 +166,14 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- 2 _col0 (type: string)
- Position of Big Table: 2
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -225,7 +228,7 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [$hdt$_1:a]
- Map 2
+ Map 5
Map Operator Tree:
TableScan
alias: b
@@ -239,14 +242,14 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- 2 _col0 (type: string)
- Position of Big Table: 2
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -301,21 +304,28 @@ STAGE PLANS:
name: default.bucket_medium
Truncated Path -> Alias:
/bucket_medium/ds=2008-04-08 [$hdt$_2:b]
- Map 5
+ Map 6
Map Operator Tree:
TableScan
- alias: d
- Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ alias: c
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Select Operator
- Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0
- 1
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ tag: 2
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -327,23 +337,23 @@ STAGE PLANS:
partition values:
ds 2008-04-08
properties:
- bucket_count 3
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_medium
- numFiles 3
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_medium { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 170
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -351,93 +361,30 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 3
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_medium
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_medium { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_medium
- name: default.bucket_medium
- Truncated Path -> Alias:
- /bucket_medium/ds=2008-04-08 [$hdt$_3:d]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 4 <- Map 3 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 1 to 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- 2 _col0 (type: string)
- input vertices:
- 0 Map 1
- 1 Map 2
- Position of Big Table: 2
- Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- input vertices:
- 1 Map 5
- Position of Big Table: 0
- Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.bucket_big
+ name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
+ base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
bucket_count 4
bucket_field_name key
@@ -480,31 +427,51 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
+ Truncated Path -> Alias:
+ /bucket_big/ds=2008-04-08 [$hdt$_0:c]
+ /bucket_big/ds=2008-04-09 [$hdt$_0:c]
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Select Operator
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ tag: 1
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
+ base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 3
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_medium
+ numFiles 3
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_medium { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 170
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -512,26 +479,65 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 3
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_medium
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_medium { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_medium
+ name: default.bucket_medium
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [$hdt$_0:c]
- /bucket_big/ds=2008-04-09 [$hdt$_0:c]
+ /bucket_medium/ds=2008-04-08 [$hdt$_3:d]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
+ Reducer 3
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Reducer 4
Needs Tagging: false
Reduce Operator Tree:
@@ -569,7 +575,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Work 'Reducer 3' is a cross product
PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out
index 8c0d506..5c73ddb 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out
@@ -41,31 +41,10 @@ POSTHOOK: query: explain
select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -81,14 +60,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -99,8 +76,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -150,38 +125,17 @@ POSTHOOK: query: explain
select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 2 (GROUP, 1)
+ Reducer 2 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
alias: b
@@ -190,14 +144,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Right Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -208,9 +160,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out
index b005bda..fbbe01d 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out
@@ -41,31 +41,10 @@ POSTHOOK: query: explain
select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -81,14 +60,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -99,8 +76,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -129,38 +104,17 @@ POSTHOOK: query: explain
select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 2 (GROUP, 1)
+ Reducer 2 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
alias: b
@@ -169,14 +123,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Right Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -187,9 +139,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out
index 025d0d2..6c25da4 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out
@@ -294,96 +294,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [$hdt$_1:b]
-
Stage: Stage-1
Spark
Edges:
@@ -404,14 +318,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 1 Map 3
Position of Big Table: 0
Statistics: Num rows: 59 Data size: 6050 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
@@ -427,13 +339,6 @@ STAGE PLANS:
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
index 3ad950a..624a730 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out
@@ -402,146 +402,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-09
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [$hdt$_1:b]
- /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
Stage: Stage-1
Spark
Edges:
@@ -562,14 +426,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 1 Map 3
Position of Big Table: 0
Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
@@ -585,13 +447,6 @@ STAGE PLANS:
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
[08/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out
index 91b1d8e..d76bf84 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out
@@ -49,27 +49,32 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -81,23 +86,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -135,11 +123,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -148,14 +135,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -164,14 +143,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Right Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -183,7 +160,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
Reducer 3
Execution mode: llap
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out
index 90d362e..ee70e6e 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out
@@ -94,77 +94,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -172,30 +145,68 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
bucket_count 2
bucket_field_name key
@@ -238,61 +249,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -300,25 +280,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -387,77 +368,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -465,30 +419,68 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
bucket_count 2
bucket_field_name key
@@ -531,61 +523,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -593,25 +554,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out
index 365f63c..26439b9 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out
@@ -94,11 +94,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -112,22 +111,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -176,7 +164,6 @@ STAGE PLANS:
name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -226,7 +213,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
/bucket_small/ds=2008-04-09 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -240,15 +226,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 2
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -264,13 +247,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -387,77 +368,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -465,59 +419,29 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
bucket_count 2
bucket_field_name key
@@ -560,31 +484,70 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_small
name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -592,26 +555,25 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -680,77 +642,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -758,59 +693,29 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
bucket_count 2
bucket_field_name key
@@ -853,31 +758,70 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_small
name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -885,26 +829,25 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
[04/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
index ce24832..49827b4 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
@@ -99,75 +99,62 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
- Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2562 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -176,7 +163,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, ds
- Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string)
@@ -189,7 +176,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -320,75 +307,62 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL
+ alias: b
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 20 Data size: 5540 Basic stats: COMPLETE Column stats: PARTIAL
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
- Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5307 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -397,7 +371,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, ds
- Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 7917 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string)
@@ -410,7 +384,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -565,75 +539,62 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
+ alias: a
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col4
- input vertices:
- 0 Map 1
- Statistics: Num rows: 168 Data size: 31080 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5307 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 5452 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -642,7 +603,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, ds
- Statistics: Num rows: 168 Data size: 45864 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 29 Data size: 7917 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string)
@@ -655,7 +616,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -816,75 +777,62 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
- Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2562 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -893,7 +841,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, ds
- Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string)
@@ -906,7 +854,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1049,75 +997,62 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), concat(value, value) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), concat(value, value) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
- Statistics: Num rows: 84 Data size: 31248 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 5208 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1126,7 +1061,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, ds
- Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string)
@@ -1139,7 +1074,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1282,75 +1217,62 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: test_table1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col3
- input vertices:
- 0 Map 1
- Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2562 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 2632 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1359,7 +1281,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, ds
- Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 14 Data size: 3822 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string)
@@ -1372,7 +1294,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out
index 3ba8a65..56e93ac 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out
@@ -77,52 +77,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string)
@@ -135,8 +123,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -167,7 +154,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -305,52 +292,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string)
@@ -363,8 +338,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -395,7 +369,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -533,52 +507,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string)
@@ -591,8 +553,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -623,7 +584,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -696,52 +657,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string)
@@ -754,8 +703,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -786,7 +734,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -865,52 +813,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string)
@@ -923,8 +859,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -955,7 +890,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1111,52 +1046,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string)
@@ -1169,8 +1092,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -1201,7 +1123,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1367,52 +1289,40 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: a
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), key2 (type: int), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int), _col1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col0, _col1, _col2, _col6
- input vertices:
- 0 Map 1
Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string)
@@ -1425,8 +1335,7 @@ STAGE PLANS:
Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: string)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -1457,7 +1366,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
[06/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out
index 0e04284..5ae1699 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out
@@ -127,11 +127,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -145,22 +144,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -209,7 +197,6 @@ STAGE PLANS:
name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -259,7 +246,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
/bucket_small/ds=2008-04-09 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -273,15 +259,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 2
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -297,13 +280,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -352,7 +333,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -472,77 +452,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -550,48 +503,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -599,78 +551,87 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -678,48 +639,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -727,26 +687,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -817,77 +777,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 2
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -895,48 +828,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -944,78 +876,87 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -1023,48 +964,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -1072,26 +1012,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
[02/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
index 60437ec..44f792a 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
@@ -418,146 +418,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-09
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [$hdt$_1:b]
- /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
Stage: Stage-1
Spark
Edges:
@@ -578,14 +442,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 1 Map 3
Position of Big Table: 0
Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
@@ -601,13 +463,6 @@ STAGE PLANS:
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
index 16ecabe..73525bc 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
@@ -539,146 +539,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-09
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [$hdt$_1:b]
- /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
Stage: Stage-1
Spark
Edges:
@@ -699,14 +563,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 1 Map 3
Position of Big Table: 0
Statistics: Num rows: 59 Data size: 6050 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
@@ -722,13 +584,6 @@ STAGE PLANS:
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
index e180471..abd6e65 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
@@ -539,146 +539,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-08
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
-#### A masked pattern was here ####
- Partition
- base file name: ds=2008-04-09
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [$hdt$_1:b]
- /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
Stage: Stage-1
Spark
Edges:
@@ -699,14 +563,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 1 Map 3
Position of Big Table: 0
Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
@@ -722,13 +584,6 @@ STAGE PLANS:
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Bucket Mapjoin Context:
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
index 4d0476f..033ee04 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
@@ -387,18 +387,19 @@ join
on src1.key = src2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (GROUP, 1)
+ Reducer 2 <- Map 1 (GROUP, 1)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1)
+ Reducer 5 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
alias: a
@@ -430,9 +431,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
- Reducer 4
- Local Work:
- Map Reduce Local Work
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -440,52 +439,34 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reducer 2
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -493,27 +474,12 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Reducer 4
- Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -1018,16 +984,18 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
alias: a
@@ -1039,20 +1007,12 @@ STAGE PLANS:
expressions: (key + 1) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Map 4
Map Operator Tree:
TableScan
alias: a
@@ -1064,28 +1024,31 @@ STAGE PLANS:
expressions: (key + 1) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
@@ -1533,34 +1496,10 @@ select count(*) from (
) subq1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -1579,14 +1518,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -1597,8 +1534,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -1650,34 +1585,10 @@ select key, count(*) from
group by key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -1696,15 +1607,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 3
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -1718,8 +1627,6 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -1787,34 +1694,10 @@ select count(*) from
) subq2
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -1834,15 +1717,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 4
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
@@ -1854,8 +1735,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -1955,43 +1834,19 @@ join
on src1.key = src2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-2 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-2
- Stage-1 depends on stages: Stage-4
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-3
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 6
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-2
+ Stage: Stage-1
Spark
Edges:
- Reducer 5 <- Map 4 (GROUP, 1)
+ Reducer 2 <- Map 1 (GROUP, 1)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1)
+ Reducer 5 <- Map 1 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 4
+ Map 1
Map Operator Tree:
TableScan
alias: a
@@ -2003,15 +1858,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 6
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2025,11 +1878,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
- Local Work:
- Map Reduce Local Work
- Reducer 5
- Local Work:
- Map Reduce Local Work
+ Reducer 2
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -2037,79 +1886,34 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0
- input vertices:
- 1 Map 3
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Local Work:
- Map Reduce Local Work
- Reducer 2
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -2117,27 +1921,12 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Reducer 5
- Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -2204,34 +1993,10 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -2250,14 +2015,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2268,8 +2031,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2335,34 +2096,10 @@ select count(*) from
on subq2.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -2381,14 +2118,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2399,8 +2134,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2490,34 +2223,10 @@ select count(*) from
on subq2.key = subq4.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -2536,14 +2245,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2554,8 +2261,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2633,34 +2338,10 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 8) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -2679,14 +2360,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2697,8 +2376,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2750,34 +2427,10 @@ select count(*) from
join tbl2 a on subq1.key = a.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -2796,14 +2449,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2814,8 +2465,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2865,34 +2514,10 @@ select count(*) from tbl1 a
on a.key = subq1.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -2911,14 +2536,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -2929,8 +2552,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -2990,54 +2611,10 @@ select count(*) from
on (subq1.key = subq3.key)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- 2 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
- Map 4
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- 2 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -3056,7 +2633,7 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
Inner Join 0 to 2
@@ -3064,9 +2641,6 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
2 _col0 (type: int)
- input vertices:
- 1 Map 3
- 2 Map 4
Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -3077,8 +2651,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
@@ -3152,34 +2724,10 @@ join tbl2 b
on subq2.key = b.key) a
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Local Work:
- Map Reduce Local Work
-
Stage: Stage-1
Spark
Edges:
@@ -3198,14 +2746,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -3216,8 +2762,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Local Work:
- Map Reduce Local Work
Reducer 2
Reduce Operator Tree:
Group By Operator
[11/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
index 243a49b..25bac39 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out
@@ -1,8 +1,8 @@
-PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_small
-POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_small
@@ -23,11 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small@ds=2008-04-08
-PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_big
-POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_big
@@ -102,15 +102,23 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-2 depends on stages: Stage-5
+ Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
+ Stage-7 has a backup stage: Stage-1
+ Stage-4 depends on stages: Stage-7
+ Stage-2 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-8 has a backup stage: Stage-1
+ Stage-5 depends on stages: Stage-8
+ Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:a
+ $hdt$_1:b
Fetch Operator
limit: -1
Partition Description:
@@ -121,101 +129,161 @@ STAGE PLANS:
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
Alias -> Map Local Operator Tree:
- $hdt$_0:a
+ $hdt$_1:b
TableScan
- alias: a
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- Position of Big Table: 1
+ Position of Big Table: 0
- Stage: Stage-2
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- Position of Big Table: 1
+ Position of Big Table: 0
Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -252,6 +320,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -300,6 +369,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -348,6 +418,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -366,8 +437,47 @@ STAGE PLANS:
name: default.bucket_small
name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [$hdt$_1:b]
- /bucket_big/ds=2008-04-09 [$hdt$_1:b]
+ /bucket_small/ds=2008-04-08 [$hdt$_0:a]
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10004
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -398,40 +508,7 @@ STAGE PLANS:
GatherStats: false
MultiFileSpray: false
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucket_big
-PREHOOK: Input: default@bucket_big@ds=2008-04-08
-PREHOOK: Input: default@bucket_big@ds=2008-04-09
-PREHOOK: Input: default@bucket_small
-PREHOOK: Input: default@bucket_small@ds=2008-04-08
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucket_big
-POSTHOOK: Input: default@bucket_big@ds=2008-04-08
-POSTHOOK: Input: default@bucket_big@ds=2008-04-09
-POSTHOOK: Input: default@bucket_small
-POSTHOOK: Input: default@bucket_small@ds=2008-04-08
-#### A masked pattern was here ####
-38
-PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-2 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:a
@@ -468,6 +545,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -505,7 +583,7 @@ STAGE PLANS:
1 _col0 (type: string)
Position of Big Table: 1
- Stage: Stage-2
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -533,13 +611,24 @@ STAGE PLANS:
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -576,6 +665,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -624,6 +714,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -672,6 +763,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -692,145 +784,466 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_big/ds=2008-04-08 [$hdt$_1:b]
/bucket_big/ds=2008-04-09 [$hdt$_1:b]
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0
- columns.types bigint
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@bucket_big
-PREHOOK: Input: default@bucket_big@ds=2008-04-08
-PREHOOK: Input: default@bucket_big@ds=2008-04-09
-PREHOOK: Input: default@bucket_small
-PREHOOK: Input: default@bucket_small@ds=2008-04-08
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
+ TableScan
+ alias: b
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
+ Path -> Alias:
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@bucket_big
-POSTHOOK: Input: default@bucket_big@ds=2008-04-08
-POSTHOOK: Input: default@bucket_big@ds=2008-04-09
-POSTHOOK: Input: default@bucket_small
-POSTHOOK: Input: default@bucket_small@ds=2008-04-08
+ Path -> Partition:
#### A masked pattern was here ####
-38
-PREHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-3
- Map Reduce Local Work
- Alias -> Map Local Tables:
- a
- Fetch Operator
- limit: -1
- Partition Description:
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ numFiles 2
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 114
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_big/ds=2008-04-08 [$hdt$_1:b]
+ /bucket_big/ds=2008-04-09 [$hdt$_1:b]
+ /bucket_small/ds=2008-04-08 [$hdt$_0:a]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@ds=2008-04-08
+PREHOOK: Input: default@bucket_big@ds=2008-04-09
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@ds=2008-04-08
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@ds=2008-04-08
+POSTHOOK: Input: default@bucket_big@ds=2008-04-09
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@ds=2008-04-08
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
+38
+PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 1
+ BucketMapJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+ Truncated Path -> Alias:
+ /bucket_big/ds=2008-04-08 [$hdt$_1:b]
+ /bucket_big/ds=2008-04-09 [$hdt$_1:b]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ NumFilesPerFileSink: 1
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- Alias -> Map Local Operator Tree:
- a
- TableScan
- alias: a
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
- Position of Big Table: 1
- Bucket Mapjoin Context:
- Alias Bucket Base File Name Mapping:
- a {ds=2008-04-08/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-08/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt]}
- Alias Bucket File Name Mapping:
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@ds=2008-04-08
+PREHOOK: Input: default@bucket_big@ds=2008-04-09
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@ds=2008-04-08
#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
+POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@ds=2008-04-08
+POSTHOOK: Input: default@bucket_big@ds=2008-04-09
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@ds=2008-04-08
#### A masked pattern was here ####
+38
+PREHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
@@ -842,29 +1255,24 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 key (type: string)
1 key (type: string)
Position of Big Table: 1
- Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -899,6 +1307,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -947,6 +1356,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -973,13 +1383,11 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1025,200 +1433,10 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain extended select /*+ MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-4
- Map Reduce Local Work
- Alias -> Map Local Tables:
- a
- Fetch Operator
- limit: -1
- Partition Description:
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_small
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- b
- Fetch Operator
- limit: -1
- Partition Description:
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_big
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
- Partition
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-09
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
- numRows 0
- partition_columns ds
- partition_columns.types string
- rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 4
- bucket_field_name key
- column.name.delimiter ,
- columns key,value
- columns.comments
- columns.types string:string
-#### A masked pattern was here ####
- name default.bucket_big
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
- Alias -> Map Local Operator Tree:
- a
- TableScan
- alias: a
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
- 2 key (type: string)
- Position of Big Table: 2
- b
- TableScan
- alias: b
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: string)
- 1 key (type: string)
- 2 key (type: string)
- Position of Big Table: 2
- Bucket Mapjoin Context:
- Alias Bucket Base File Name Mapping:
- a {ds=2008-04-08/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-08/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket1outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket2outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket3outof4.txt=[ds=2008-04-08/smallsrcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket4outof4.txt=[ds=2008-04-08/smallsrcsortbucket2outof4.txt]}
- b {ds=2008-04-08/srcsortbucket1outof4.txt=[ds=2008-04-08/srcsortbucket1outof4.txt, ds=2008-04-09/srcsortbucket1outof4.txt], ds=2008-04-08/srcsortbucket2outof4.txt=[ds=2008-04-08/srcsortbucket2outof4.txt, ds=2008-04-09/srcsortbucket2outof4.txt], ds=2008-04-08/srcsortbucket3outof4.txt=[ds=2008-04-08/srcsortbucket3outof4.txt, ds=2008-04-09/srcsortbucket3outof4.txt], ds=2008-04-08/srcsortbucket4outof4.txt=[ds=2008-04-08/srcsortbucket4outof4.txt, ds=2008-04-09/srcsortbucket4outof4.txt], ds=2008-04-09/srcsortbucket1outof4.txt=[ds=2008-04-08/srcsortbucket1outof4.txt, ds=2008-04-09/srcsortbucket1outof4.txt], ds=2008-04-09/srcsortbucket2outof4.txt=[ds=2008-04-08/srcsortbucket2outof4.txt, ds=2008-04-09/srcsortbucket2outof4.txt], ds=2008-04-09/srcsortbucket3outof4.txt=[ds=2008-04-08/srcsortbucket3outof4.txt, ds=2008-04-09/srcsortbucket3outof4.txt], ds=2008-04-09/srcsortbucket4outof4.txt=[ds=2008-04-08/srcsortbucket4outof4.txt, ds=2008-04-09/srcsortbucket4outof4.txt]}
- Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
- Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-
Stage: Stage-1
Map Reduce
Map Operator Tree:
@@ -1230,7 +1448,7 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
Inner Join 0 to 2
@@ -1239,22 +1457,17 @@ STAGE PLANS:
1 key (type: string)
2 key (type: string)
Position of Big Table: 2
- Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
- Local Work:
- Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1289,6 +1502,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1337,6 +1551,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1363,13 +1578,11 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
[05/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
index 37c07af..1acb915 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out
@@ -57,13 +57,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -73,14 +83,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -92,26 +100,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -172,13 +160,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -188,15 +186,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 3
Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -211,26 +207,6 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -307,7 +283,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -315,6 +290,17 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -324,15 +310,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 4
Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: int)
@@ -345,26 +329,6 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
- LLAP IO: no inputs
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -474,14 +438,25 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE)
- Reducer 3 <- Map 1 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -491,15 +466,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 4
Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -513,15 +486,8 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: b
@@ -533,13 +499,38 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -549,28 +540,35 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Reducer 3
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 3
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -659,7 +657,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -675,14 +672,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -694,26 +700,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -788,13 +774,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -804,14 +800,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -823,26 +817,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -941,7 +915,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -957,14 +930,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -976,26 +958,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1082,7 +1044,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -1098,14 +1059,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 8) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -1117,26 +1087,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 8) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1201,8 +1151,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1217,27 +1167,14 @@ STAGE PLANS:
expressions: (key + 1) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: a
@@ -1259,6 +1196,25 @@ STAGE PLANS:
Reducer 2
Execution mode: llap
Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
@@ -1316,7 +1272,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -1332,14 +1287,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -1351,26 +1315,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1429,7 +1373,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -1445,14 +1388,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -1464,26 +1416,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1552,7 +1484,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -1568,30 +1499,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- 2 _col0 (type: int)
- input vertices:
- 1 Map 3
- 2 Map 4
- Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: llap
- LLAP IO: no inputs
- Map 3
Map Operator Tree:
TableScan
alias: a
@@ -1603,14 +1510,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 4
Map Operator Tree:
TableScan
alias: a
@@ -1622,13 +1521,25 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1711,13 +1622,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -1727,14 +1648,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -1746,26 +1665,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1838,13 +1737,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -1854,14 +1763,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -1873,26 +1780,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -1953,13 +1840,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -1969,15 +1866,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 3
Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -1992,26 +1887,6 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2088,7 +1963,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2096,6 +1970,17 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -2105,15 +1990,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 4
Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: int)
@@ -2126,26 +2009,6 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
- LLAP IO: no inputs
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2255,14 +2118,25 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE)
- Reducer 3 <- Map 1 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -2272,15 +2146,13 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- input vertices:
- 1 Map 4
Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -2294,15 +2166,8 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: b
@@ -2314,13 +2179,38 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2330,28 +2220,35 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Reducer 3
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 3
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -2440,7 +2337,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -2456,14 +2352,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -2475,26 +2380,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2569,13 +2454,23 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -2585,14 +2480,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -2604,26 +2497,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2722,7 +2595,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -2738,14 +2610,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -2757,26 +2638,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2863,7 +2724,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -2879,14 +2739,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 8) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -2898,26 +2767,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 8) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -2978,7 +2827,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -2994,14 +2842,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -3013,26 +2870,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -3091,7 +2928,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -3107,14 +2943,23 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -3126,26 +2971,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (key < 6) (type: boolean)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -3214,7 +3039,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -3230,30 +3054,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- 2 _col0 (type: int)
- input vertices:
- 1 Map 3
- 2 Map 4
- Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: llap
- LLAP IO: no inputs
- Map 3
Map Operator Tree:
TableScan
alias: a
@@ -3265,14 +3065,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 4
Map Operator Tree:
TableScan
alias: a
@@ -3284,13 +3076,25 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
<TRUNCATED>
[07/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out
index 8ee44b3..43500fb 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out
@@ -110,11 +110,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -128,22 +127,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -192,7 +180,6 @@ STAGE PLANS:
name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -242,7 +229,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
/bucket_small/ds=2008-04-09 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -256,15 +242,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 4
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -280,13 +263,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -403,77 +384,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 4
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -481,59 +435,29 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
bucket_count 4
bucket_field_name key
@@ -576,31 +500,70 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_small
name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -608,26 +571,25 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -696,77 +658,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 4
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -774,59 +709,29 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
bucket_count 4
bucket_field_name key
@@ -869,31 +774,70 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_small
name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -901,26 +845,25 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out
index 83d5a96..a88396b 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out
@@ -127,11 +127,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -145,22 +144,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -209,7 +197,6 @@ STAGE PLANS:
name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -259,7 +246,6 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
/bucket_small/ds=2008-04-09 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -273,15 +259,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 4
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -297,13 +280,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -352,7 +333,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -472,77 +452,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 4
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -550,48 +503,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -599,78 +551,87 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -678,48 +639,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -727,26 +687,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -817,77 +777,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 4
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -895,48 +828,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 2
+ name default.bucket_small
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 226
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -944,78 +876,87 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
+ /bucket_small/ds=2008-04-08 [b]
+ /bucket_small/ds=2008-04-09 [b]
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
+ Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -1023,48 +964,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 4
+ name default.bucket_big
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 226
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -1072,26 +1012,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
- /bucket_small/ds=2008-04-09 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
[10/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
index 3d0559a..1ed3dd0 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out
@@ -134,21 +134,39 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
-Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-8:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-3 depends on stages: Stage-9
+ Stage-13 is a root stage , consists of Stage-16, Stage-17, Stage-18, Stage-1
+ Stage-16 has a backup stage: Stage-1
+ Stage-10 depends on stages: Stage-16
+ Stage-9 depends on stages: Stage-1, Stage-10, Stage-11, Stage-12 , consists of Stage-14, Stage-15, Stage-2
+ Stage-14 has a backup stage: Stage-2
+ Stage-7 depends on stages: Stage-14
+ Stage-3 depends on stages: Stage-2, Stage-7, Stage-8
+ Stage-15 has a backup stage: Stage-2
+ Stage-8 depends on stages: Stage-15
+ Stage-2
+ Stage-17 has a backup stage: Stage-1
+ Stage-11 depends on stages: Stage-17
+ Stage-18 has a backup stage: Stage-1
+ Stage-12 depends on stages: Stage-18
+ Stage-1
Stage-0 depends on stages: Stage-3
STAGE PLANS:
- Stage: Stage-9
+ Stage: Stage-13
+ Conditional Operator
+
+ Stage: Stage-16
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:a
+ $hdt$_0:c
Fetch Operator
limit: -1
Partition Description:
@@ -159,23 +177,23 @@ STAGE PLANS:
partition values:
ds 2008-04-08
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -183,51 +201,47 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
- $hdt$_2:b
- Fetch Operator
- limit: -1
- Partition Description:
+ name: default.bucket_big
+ name: default.bucket_big
Partition
- base file name: ds=2008-04-08
+ base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 3
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_medium
- numFiles 3
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_medium { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 170
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -235,24 +249,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 3
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_medium
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_medium { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_medium
- name: default.bucket_medium
- $hdt$_3:d
+ name: default.bucket_big
+ name: default.bucket_big
+ $hdt$_2:b
Fetch Operator
limit: -1
Partition Description:
@@ -305,25 +319,25 @@ STAGE PLANS:
name: default.bucket_medium
name: default.bucket_medium
Alias -> Map Local Operator Tree:
- $hdt$_1:a
+ $hdt$_0:c
TableScan
- alias: a
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ alias: c
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 _col0 (type: string)
1 _col0 (type: string)
2 _col0 (type: string)
- Position of Big Table: 2
+ Position of Big Table: 0
$hdt$_2:b
TableScan
alias: b
@@ -342,35 +356,23 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
2 _col0 (type: string)
- Position of Big Table: 2
- $hdt$_3:d
- TableScan
- alias: d
- Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
- GatherStats: false
- Select Operator
- Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
- HashTable Sink Operator
- keys:
- 0
- 1
- Position of Big Table: 0
+ Position of Big Table: 0
- Stage: Stage-3
+ Stage: Stage-10
Map Reduce
Map Operator Tree:
TableScan
- alias: c
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -379,28 +381,26 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
2 _col0 (type: string)
- Position of Big Table: 2
+ Position of Big Table: 0
Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- Position of Big Table: 0
- Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -603,8 +603,234 @@ STAGE PLANS:
name: default.bucket_small
name: default.bucket_small
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [$hdt$_0:c]
- /bucket_big/ds=2008-04-09 [$hdt$_0:c]
+ /bucket_small/ds=2008-04-08 [$hdt$_1:a]
+
+ Stage: Stage-9
+ Conditional Operator
+
+ Stage: Stage-14
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_3:d
+ Fetch Operator
+ limit: -1
+ Partition Description:
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+ Alias -> Map Local Operator Tree:
+ $hdt$_3:d
+ TableScan
+ alias: d
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Select Operator
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Position of Big Table: 0
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Position of Big Table: 0
+ Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10004
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10005
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -635,13 +861,1417 @@ STAGE PLANS:
GatherStats: false
MultiFileSpray: false
+ Stage: Stage-15
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ GatherStats: false
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+ Position of Big Table: 1
+
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Select Operator
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Position of Big Table: 1
+ Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10004
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+ Truncated Path -> Alias:
+ /bucket_medium/ds=2008-04-08 [$hdt$_3:d]
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
+ TableScan
+ alias: d
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ GatherStats: false
+ Select Operator
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE
+ tag: 1
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10004
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+ Truncated Path -> Alias:
+ /bucket_medium/ds=2008-04-08 [$hdt$_3:d]
+#### A masked pattern was here ####
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-17
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:c
+ Fetch Operator
+ limit: -1
+ Partition Description:
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+ $hdt$_1:a
+ Fetch Operator
+ limit: -1
+ Partition Description:
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ numFiles 2
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 114
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_small
+ name: default.bucket_small
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:c
+ TableScan
+ alias: c
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Position of Big Table: 1
+ $hdt$_1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Position of Big Table: 1
+
+ Stage: Stage-11
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Position of Big Table: 1
+ Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ numFiles 2
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 114
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_medium/ds=2008-04-08 [$hdt$_2:b]
+
+ Stage: Stage-18
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:a
+ Fetch Operator
+ limit: -1
+ Partition Description:
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ numFiles 2
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 114
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_small
+ name: default.bucket_small
+ $hdt$_2:b
+ Fetch Operator
+ limit: -1
+ Partition Description:
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Position of Big Table: 2
+ $hdt$_2:b
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Position of Big Table: 2
+
+ Stage: Stage-12
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Position of Big Table: 2
+ Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ numFiles 2
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 114
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_big/ds=2008-04-08 [$hdt$_0:c]
+ /bucket_big/ds=2008-04-09 [$hdt$_0:c]
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
+ TableScan
+ alias: c
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ tag: 2
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ numFiles 4
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 4
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_big
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_big { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_big
+ name: default.bucket_big
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ numFiles 3
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 170
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 3
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_medium
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_medium { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_medium
+ name: default.bucket_medium
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ numFiles 2
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 114
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.bucket_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket_small { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_big/ds=2008-04-08 [$hdt$_0:c]
+ /bucket_big/ds=2008-04-09 [$hdt$_0:c]
+ /bucket_medium/ds=2008-04-08 [$hdt$_2:b]
+ /bucket_small/ds=2008-04-08 [$hdt$_1:a]
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 1 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns
+ columns.types
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-8:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
[09/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
index 36bfac3..8fc43e4 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
@@ -112,11 +112,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -130,22 +129,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -194,7 +182,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -208,15 +195,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -232,13 +216,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -287,7 +269,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -405,77 +386,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -483,30 +437,68 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
bucket_count 4
bucket_field_name key
@@ -549,61 +541,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -611,25 +572,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -698,77 +660,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -776,30 +711,68 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
bucket_count 4
bucket_field_name key
@@ -842,61 +815,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -904,25 +846,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
index c1459d5..a724617 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
@@ -71,9 +71,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+ Map 1 <- Union 2 (CONTAINS)
+ Map 5 <- Union 2 (CONTAINS)
+ Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -88,27 +89,14 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 5
- Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: a
@@ -120,27 +108,14 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 5
- Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: a
@@ -157,16 +132,30 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 3
Execution mode: llap
Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
@@ -240,8 +229,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -252,19 +240,10 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: key (type: int)
- mode: final
+ Select Operator
+ expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
alias: a
@@ -272,18 +251,17 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
+ Group By Operator
+ keys: key (type: int)
+ mode: final
outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -295,8 +273,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
index 37d97d2..3eb8fde 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
@@ -1,8 +1,8 @@
-PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_small
-POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_small
@@ -23,11 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small@ds=2008-04-08
-PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_big
-POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_big
@@ -110,8 +110,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -172,6 +172,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -191,7 +192,7 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -205,29 +206,14 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: true
Execution mode: llap
LLAP IO: no inputs
Path -> Alias:
@@ -264,6 +250,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -312,6 +299,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -332,6 +320,30 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_big/ds=2008-04-08 [b]
/bucket_big/ds=2008-04-09 [b]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 1
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Reducer 3
Execution mode: llap
Needs Tagging: false
@@ -400,11 +412,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -418,22 +429,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -462,6 +462,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -481,7 +482,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -495,15 +495,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -519,13 +516,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -554,6 +549,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -573,7 +569,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -602,6 +597,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -690,11 +686,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -704,22 +699,11 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -748,6 +732,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -767,7 +752,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -777,15 +761,12 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
keys:
0 key (type: string)
1 key (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -801,13 +782,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -836,6 +815,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -855,7 +835,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -884,6 +863,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -972,11 +952,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -986,22 +965,11 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1030,6 +998,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -1049,50 +1018,20 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- Estimated key counts: Map 1 => 1, Map 4 => 23
- keys:
- 0 key (type: string)
- 1 key (type: string)
- 2 key (type: string)
- input vertices:
- 0 Map 1
- 2 Map 4
- Position of Big Table: 1
- Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1121,6 +1060,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1140,7 +1080,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1169,6 +1108,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1187,34 +1127,45 @@ STAGE PLANS:
name: default.bucket_big
name: default.bucket_big
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
- Map 4
+ /bucket_big/ds=2008-04-08 [c]
+ /bucket_big/ds=2008-04-09 [c]
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- tag: 2
- auto parallelism: true
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ 2 key (type: string)
+ Position of Big Table: 1
+ Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1243,6 +1194,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1262,7 +1214,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1291,6 +1242,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1309,8 +1261,8 @@ STAGE PLANS:
name: default.bucket_big
name: default.bucket_big
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [c]
- /bucket_big/ds=2008-04-09 [c]
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Reducer 3
Execution mode: llap
Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
index 6555736..c14441a 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
@@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
-Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
@@ -148,12 +148,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE)
Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE)
Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: a
@@ -167,22 +166,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -231,7 +219,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -245,22 +232,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -309,7 +285,6 @@ STAGE PLANS:
name: default.bucket_medium
Truncated Path -> Alias:
/bucket_medium/ds=2008-04-08 [b]
- Map 3
Map Operator Tree:
TableScan
alias: c
@@ -323,18 +298,14 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 19719 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
Inner Join 1 to 2
- Estimated key counts: Map 1 => 1, Map 2 => 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
2 _col0 (type: string)
- input vertices:
- 0 Map 1
- 1 Map 2
Position of Big Table: 2
Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -344,13 +315,11 @@ STAGE PLANS:
tag: 0
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -399,7 +368,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -581,7 +549,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
index b78a517..78d02b8 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
@@ -79,7 +79,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -87,6 +86,17 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -96,15 +106,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- input vertices:
- 1 Map 4
Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -157,27 +165,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Execution mode: llap
- LLAP IO: no inputs
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
index 72d2c62..235c13a 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
@@ -49,27 +49,32 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -81,23 +86,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -156,11 +144,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -169,14 +156,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -185,14 +164,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Right Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 189 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -204,7 +181,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
Reducer 3
Execution mode: llap
Reduce Operator Tree:
[12/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Posted by jd...@apache.org.
HIVE-18208: SMB Join : Fix the unit tests to run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/38405c14
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/38405c14
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/38405c14
Branch: refs/heads/master
Commit: 38405c1458cf2c6ee508fedf38581df1fc8c1f61
Parents: 1320d2b
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Dec 12 12:50:51 2017 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue Dec 12 12:50:51 2017 -0800
----------------------------------------------------------------------
.../clientpositive/auto_sortmerge_join_1.q | 2 +
.../clientpositive/auto_sortmerge_join_10.q | 3 +
.../clientpositive/auto_sortmerge_join_11.q | 8 +-
.../clientpositive/auto_sortmerge_join_12.q | 2 +
.../clientpositive/auto_sortmerge_join_13.q | 2 +-
.../clientpositive/auto_sortmerge_join_14.q | 2 +
.../clientpositive/auto_sortmerge_join_15.q | 2 +
.../clientpositive/auto_sortmerge_join_2.q | 2 +
.../clientpositive/auto_sortmerge_join_3.q | 2 +
.../clientpositive/auto_sortmerge_join_4.q | 2 +
.../clientpositive/auto_sortmerge_join_7.q | 2 +
.../clientpositive/auto_sortmerge_join_8.q | 2 +
.../clientpositive/auto_sortmerge_join_9.q | 3 +
.../bucketsortoptimize_insert_2.q | 2 +
.../bucketsortoptimize_insert_6.q | 1 +
.../bucketsortoptimize_insert_7.q | 2 +
.../test/queries/clientpositive/quotedid_smb.q | 6 +
ql/src/test/queries/clientpositive/smb_cache.q | 11 +-
.../clientpositive/auto_sortmerge_join_10.q.out | 331 +++-
.../clientpositive/auto_sortmerge_join_11.q.out | 1043 ++++++----
.../clientpositive/auto_sortmerge_join_12.q.out | 1790 +++++++++++++++++-
.../llap/auto_sortmerge_join_1.q.out | 329 ++--
.../llap/auto_sortmerge_join_10.q.out | 113 +-
.../llap/auto_sortmerge_join_11.q.out | 220 +--
.../llap/auto_sortmerge_join_12.q.out | 40 +-
.../llap/auto_sortmerge_join_13.q.out | 37 +-
.../llap/auto_sortmerge_join_14.q.out | 46 +-
.../llap/auto_sortmerge_join_15.q.out | 46 +-
.../llap/auto_sortmerge_join_2.q.out | 306 ++-
.../llap/auto_sortmerge_join_3.q.out | 329 ++--
.../llap/auto_sortmerge_join_4.q.out | 329 ++--
.../llap/auto_sortmerge_join_7.q.out | 352 ++--
.../llap/auto_sortmerge_join_8.q.out | 352 ++--
.../llap/auto_sortmerge_join_9.q.out | 1110 +++++------
.../llap/bucketsortoptimize_insert_2.q.out | 306 ++-
.../llap/bucketsortoptimize_insert_6.q.out | 273 +--
.../llap/bucketsortoptimize_insert_7.q.out | 117 +-
.../clientpositive/llap/quotedid_smb.q.out | 81 +-
.../results/clientpositive/llap/smb_cache.q.out | 191 +-
.../spark/auto_sortmerge_join_1.q.out | 99 +-
.../spark/auto_sortmerge_join_12.q.out | 252 +--
.../spark/auto_sortmerge_join_14.q.out | 64 +-
.../spark/auto_sortmerge_join_15.q.out | 64 +-
.../spark/auto_sortmerge_join_2.q.out | 99 +-
.../spark/auto_sortmerge_join_3.q.out | 149 +-
.../spark/auto_sortmerge_join_4.q.out | 149 +-
.../spark/auto_sortmerge_join_7.q.out | 149 +-
.../spark/auto_sortmerge_join_8.q.out | 149 +-
.../spark/auto_sortmerge_join_9.q.out | 736 ++-----
.../spark/bucketsortoptimize_insert_2.q.out | 240 +--
.../clientpositive/spark/quotedid_smb.q.out | 51 +
51 files changed, 4974 insertions(+), 5024 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
index a1d5249..a044c6d 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
@@ -26,6 +26,8 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
index e65344d..abd3a34 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
@@ -20,6 +20,9 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=1;
+
-- One of the subqueries contains a union, so it should not be converted to a sort-merge join.
explain
select count(*) from
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
index 11499f8..1393351 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
@@ -3,11 +3,11 @@ set hive.strict.checks.bucketing=false;
set hive.mapred.mode=nonstrict;
-- small 1 part, 2 bucket & big 2 part, 4 bucket
-CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE;
load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE;
load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
@@ -19,14 +19,14 @@ load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bu
load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
set hive.auto.convert.join=true;
-
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;
-
-- Since size is being used to find the big table, the order of the tables in the join does not matter
-- The tables are only bucketed and not sorted, the join should not be converted
-- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
index b512cc5..cfaef76 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
@@ -23,6 +23,8 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
index b5706be..32b57ab 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
@@ -22,6 +22,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
-- A SMB join followed by a mutli-insert
+set hive.auto.convert.join.noconditionaltask.size=200;
explain
from (
SELECT a.key key1, a.value value1, b.key key2, b.value value2
@@ -41,7 +42,6 @@ select * from dest1;
select * from dest2;
set hive.auto.convert.join.noconditionaltask=true;
-set hive.auto.convert.join.noconditionaltask.size=200;
set hive.mapjoin.hybridgrace.minwbsize=100;
set hive.mapjoin.hybridgrace.minnumpartitions=2;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
index dd59c74..7ce7e05 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
@@ -17,6 +17,8 @@ set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
explain
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
index 1480b15..84a8ed7 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
@@ -17,6 +17,8 @@ set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
explain
select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
index e77d937..122bba5 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
@@ -21,6 +21,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
index 183f033..fa6e0af 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
@@ -21,6 +21,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=100;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
index 21f273a..3540779 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
@@ -23,6 +23,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=200;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
index cf12331..5a16d8c 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
@@ -26,6 +26,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
index 5ec4e26..4ae7b1d 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
@@ -29,6 +29,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+--disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
index f95631f..08dbf6c 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
@@ -20,6 +20,9 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+--disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
+
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select count(*) from (
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
index 4a14587..c812cf1 100644
--- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
+++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
@@ -11,6 +11,8 @@ set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
index ec0c2dc..894ba81 100644
--- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
+++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
@@ -28,6 +28,7 @@ INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT key, key+1, value
-- Insert data into the bucketed table by selecting from another bucketed table
-- This should be a map-only operation, since the sort-order matches
+set hive.auto.convert.join.noconditionaltask.size=800;
EXPLAIN
INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, a.key2, concat(a.value, b.value)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
index 45635c1..a68c0b4 100644
--- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
+++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
@@ -11,6 +11,8 @@ set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/quotedid_smb.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/quotedid_smb.q b/ql/src/test/queries/clientpositive/quotedid_smb.q
index 25d1f0e..11642f5 100644
--- a/ql/src/test/queries/clientpositive/quotedid_smb.q
+++ b/ql/src/test/queries/clientpositive/quotedid_smb.q
@@ -27,7 +27,13 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=1000;
+explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+;
select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
where a.`x+1` < '11'
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/smb_cache.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_cache.q b/ql/src/test/queries/clientpositive/smb_cache.q
index e415e51..67a9242 100644
--- a/ql/src/test/queries/clientpositive/smb_cache.q
+++ b/ql/src/test/queries/clientpositive/smb_cache.q
@@ -1,4 +1,4 @@
-set hive.explain.user=true;
+set hive.explain.user=false;
create table bug_201_input_a (
userid int
) clustered by (userid) sorted by (userid) into 64 BUCKETS ;
@@ -86,6 +86,7 @@ select distinct(userid) as userid from (
162031843 ,141532840 ,154222699 ,109320121 ,155198842
)) as arr )a ) b;
+
explain
select
t1.userid,
@@ -97,12 +98,12 @@ select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid) ;
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid;
set hive.auto.convert.join=true;
-set hive.auto.convert.join.noconditionaltask.size=100 ;
-set hive.auto.convert.sortmerge.join=true
+set hive.auto.convert.join.noconditionaltask.size=5;
+set hive.auto.convert.sortmerge.join=true;
set hive.convert.join.bucket.mapjoin.tez = true;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
@@ -117,5 +118,5 @@ select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid) ;
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
index 22ac2a2..3a90b05 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
@@ -63,13 +63,20 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage
- Stage-5 depends on stages: Stage-6
- Stage-2 depends on stages: Stage-5
+ Stage-7 is a root stage , consists of Stage-8, Stage-9, Stage-1
+ Stage-8 has a backup stage: Stage-1
+ Stage-5 depends on stages: Stage-8
+ Stage-2 depends on stages: Stage-1, Stage-5, Stage-6
+ Stage-9 has a backup stage: Stage-1
+ Stage-6 depends on stages: Stage-9
+ Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-6
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:a
@@ -180,6 +187,157 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Stage: Stage-9
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0-subquery1:$hdt$_0-subquery1:a
+ Fetch Operator
+ limit: -1
+ $hdt$_0-subquery2:$hdt$_0-subquery2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0-subquery1:$hdt$_0-subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ $hdt$_0-subquery2:$hdt$_0-subquery2:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
@@ -230,36 +388,43 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-2 depends on stages: Stage-5
+ Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
+ Stage-7 has a backup stage: Stage-1
+ Stage-4 depends on stages: Stage-7
+ Stage-2 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-8 has a backup stage: Stage-1
+ Stage-5 depends on stages: Stage-8
+ Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:a
+ $hdt$_1:a
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:a
+ $hdt$_1:a
TableScan
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: key (type: int)
- mode: final
+ Select Operator
+ expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Stage: Stage-2
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
@@ -268,10 +433,11 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
+ Group By Operator
+ keys: key (type: int)
+ mode: final
outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -284,12 +450,23 @@ STAGE PLANS:
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -304,6 +481,118 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
Stage: Stage-0
Fetch Operator
limit: -1