You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/12/12 20:51:32 UTC
[09/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
index 36bfac3..8fc43e4 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out
@@ -112,11 +112,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -130,22 +129,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -194,7 +182,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -208,15 +195,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -232,13 +216,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -287,7 +269,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -405,77 +386,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -483,30 +437,68 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
bucket_count 4
bucket_field_name key
@@ -549,61 +541,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -611,25 +572,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
@@ -698,77 +660,50 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 3 => 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
- numFiles 4
+ name default.bucket_small
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 114
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -776,30 +711,68 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 4
+ bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_big
+ name default.bucket_small
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_big { string key, string value}
+ serialization.ddl struct bucket_small { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_big
- name: default.bucket_big
+ name: default.bucket_small
+ name: default.bucket_small
+ Truncated Path -> Alias:
+ /bucket_small/ds=2008-04-08 [b]
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: llap
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
bucket_count 4
bucket_field_name key
@@ -842,61 +815,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [a]
- /bucket_big/ds=2008-04-09 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
- numFiles 2
+ name default.bucket_big
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 114
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -904,25 +846,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
+ bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types string:string
#### A masked pattern was here ####
- name default.bucket_small
+ name default.bucket_big
partition_columns ds
partition_columns.types string
- serialization.ddl struct bucket_small { string key, string value}
+ serialization.ddl struct bucket_big { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket_small
- name: default.bucket_small
+ name: default.bucket_big
+ name: default.bucket_big
Truncated Path -> Alias:
- /bucket_small/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-08 [a]
+ /bucket_big/ds=2008-04-09 [a]
Reducer 2
Execution mode: llap
Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
index c1459d5..a724617 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
@@ -71,9 +71,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+ Map 1 <- Union 2 (CONTAINS)
+ Map 5 <- Union 2 (CONTAINS)
+ Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -88,27 +89,14 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 5
- Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: a
@@ -120,27 +108,14 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 5
- Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: a
@@ -157,16 +132,30 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Reducer 3
Execution mode: llap
Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
mode: mergepartial
@@ -240,8 +229,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -252,19 +240,10 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: key (type: int)
- mode: final
+ Select Operator
+ expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
alias: a
@@ -272,18 +251,17 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
+ Group By Operator
+ keys: key (type: int)
+ mode: final
outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -295,8 +273,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Reducer 3
+ Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
index 37d97d2..3eb8fde 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out
@@ -1,8 +1,8 @@
-PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_small
-POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_small
@@ -23,11 +23,11 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small@ds=2008-04-08
-PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_big
-POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_big
@@ -110,8 +110,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
- Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -172,6 +172,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -191,7 +192,7 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -205,29 +206,14 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: true
Execution mode: llap
LLAP IO: no inputs
Path -> Alias:
@@ -264,6 +250,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -312,6 +299,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -332,6 +320,30 @@ STAGE PLANS:
Truncated Path -> Alias:
/bucket_big/ds=2008-04-08 [b]
/bucket_big/ds=2008-04-09 [b]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 1
+ Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Reducer 3
Execution mode: llap
Needs Tagging: false
@@ -400,11 +412,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -418,22 +429,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -462,6 +462,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -481,7 +482,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -495,15 +495,12 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -519,13 +516,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -554,6 +549,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -573,7 +569,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -602,6 +597,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -690,11 +686,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -704,22 +699,11 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -748,6 +732,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -767,7 +752,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -777,15 +761,12 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
- Estimated key counts: Map 1 => 1
keys:
0 key (type: string)
1 key (type: string)
- input vertices:
- 0 Map 1
Position of Big Table: 1
Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -801,13 +782,11 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -836,6 +815,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -855,7 +835,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -884,6 +863,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -972,11 +952,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -986,22 +965,11 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1030,6 +998,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -1049,50 +1018,20 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- Estimated key counts: Map 1 => 1, Map 4 => 23
- keys:
- 0 key (type: string)
- 1 key (type: string)
- 2 key (type: string)
- input vertices:
- 0 Map 1
- 2 Map 4
- Position of Big Table: 1
- Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1121,6 +1060,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1140,7 +1080,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1169,6 +1108,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1187,34 +1127,45 @@ STAGE PLANS:
name: default.bucket_big
name: default.bucket_big
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
- Map 4
+ /bucket_big/ds=2008-04-08 [c]
+ /bucket_big/ds=2008-04-09 [c]
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE
- tag: 2
- auto parallelism: true
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ 2 key (type: string)
+ Position of Big Table: 1
+ Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1243,6 +1194,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1262,7 +1214,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1291,6 +1242,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ SORTBUCKETCOLSPREFIX TRUE
bucket_count 4
bucket_field_name key
column.name.delimiter ,
@@ -1309,8 +1261,8 @@ STAGE PLANS:
name: default.bucket_big
name: default.bucket_big
Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [c]
- /bucket_big/ds=2008-04-09 [c]
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Reducer 3
Execution mode: llap
Needs Tagging: false
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
index 6555736..c14441a 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out
@@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
-Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
@@ -148,12 +148,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE)
Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE)
Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
alias: a
@@ -167,22 +166,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -231,7 +219,6 @@ STAGE PLANS:
name: default.bucket_small
Truncated Path -> Alias:
/bucket_small/ds=2008-04-08 [a]
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -245,22 +232,11 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
- Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -309,7 +285,6 @@ STAGE PLANS:
name: default.bucket_medium
Truncated Path -> Alias:
/bucket_medium/ds=2008-04-08 [b]
- Map 3
Map Operator Tree:
TableScan
alias: c
@@ -323,18 +298,14 @@ STAGE PLANS:
expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 111 Data size: 19719 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
Inner Join 1 to 2
- Estimated key counts: Map 1 => 1, Map 2 => 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
2 _col0 (type: string)
- input vertices:
- 0 Map 1
- 1 Map 2
Position of Big Table: 2
Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -344,13 +315,11 @@ STAGE PLANS:
tag: 0
auto parallelism: false
Execution mode: llap
- LLAP IO: no inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -399,7 +368,6 @@ STAGE PLANS:
name: default.bucket_big
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -581,7 +549,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
index b78a517..78d02b8 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
@@ -79,7 +79,6 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -87,6 +86,17 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -96,15 +106,13 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- input vertices:
- 1 Map 4
Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -157,27 +165,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Execution mode: llap
- LLAP IO: no inputs
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
index 72d2c62..235c13a 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out
@@ -49,27 +49,32 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Operator Tree:
+ TableScan
alias: a
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 1 Map 3
Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -81,23 +86,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -156,11 +144,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
alias: a
@@ -169,14 +156,6 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
- Map 2
Map Operator Tree:
TableScan
alias: b
@@ -185,14 +164,12 @@ STAGE PLANS:
expressions: key (type: int)
outputColumnNames: _col0
Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
+ Merge Join Operator
condition map:
Right Outer Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- input vertices:
- 0 Map 1
Statistics: Num rows: 189 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -204,7 +181,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
- LLAP IO: no inputs
Reducer 3
Execution mode: llap
Reduce Operator Tree: