You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/13 18:44:42 UTC
svn commit: r1645338 [3/9] - in /hive/branches/spark: data/conf/spark/
itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/test/results/clientpositive/spark/
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out Sat Dec 13 17:44:41 2014
@@ -120,34 +120,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -161,18 +162,18 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
- bucket_field_name value
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -188,27 +189,75 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
+ Truncated Path -> Alias:
+ /srcbucket_mapjoin_part_2/part=1 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
- bucket_field_name key
+ bucket_field_name value
columns key,value
columns.comments
columns.types int:string
@@ -245,36 +294,13 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -283,13 +309,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -305,40 +331,20 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -458,44 +464,45 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -504,13 +511,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -526,45 +533,71 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
- Map 4
+ /srcbucket_mapjoin_part_2/part=1 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -573,13 +606,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -595,40 +628,19 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -747,44 +759,45 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -793,13 +806,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -815,45 +828,71 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
- Map 4
+ /srcbucket_mapjoin_part_2/part=1 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -862,13 +901,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -884,40 +923,19 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -1038,44 +1056,45 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -1084,13 +1103,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1101,50 +1120,76 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
- bucket_field_name value
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
- Map 4
+ /srcbucket_mapjoin_part_2/part=1 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -1153,13 +1198,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1170,45 +1215,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
- bucket_field_name key
+ bucket_field_name value
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out Sat Dec 13 17:44:41 2014
@@ -158,35 +158,36 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-3
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -199,64 +200,107 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
+ name default.srcbucket_mapjoin_part_2
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 3062
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
+ name default.srcbucket_mapjoin_part_2
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part/ds=2008-04-08 [a]
- Map 3
+ /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col7
+ input vertices:
+ 1 Map 2
+ Position of Big Table: 0
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -269,87 +313,46 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
+ name default.srcbucket_mapjoin_part
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
- Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {KEY.reducesinkkey0} {VALUE._col0}
- 1 {VALUE._col0}
- outputColumnNames: _col0, _col1, _col7
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ /srcbucket_mapjoin_part/ds=2008-04-08 [a]
Stage: Stage-0
Move Operator
@@ -552,15 +555,14 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-3
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -573,14 +575,16 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {value}
+ 1 {value}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 1
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -633,7 +637,12 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [a]
- Map 3
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
Map Operator Tree:
TableScan
alias: b
@@ -643,14 +652,57 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col7
+ input vertices:
+ 0 Map 1
+ Position of Big Table: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 1
+ numRows 564
+ rawDataSize 10503
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11067
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -703,52 +755,6 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
- Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {KEY.reducesinkkey0} {VALUE._col0}
- 1 {VALUE._col0}
- outputColumnNames: _col0, _col1, _col7
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 3
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -766,7 +772,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 3
+ numFiles 1
numRows 564
rawDataSize 10503
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
@@ -968,35 +974,36 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-3
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 2
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: value (type: string)
- auto parallelism: false
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1009,74 +1016,51 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
+ name default.srcbucket_mapjoin_part_2
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 3062
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
+ name default.srcbucket_mapjoin_part_2
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part/ds=2008-04-08 [a]
- Map 3
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: value (type: string)
- auto parallelism: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-08
+ base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-08
+ ds 2008-04-09
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -1117,102 +1101,127 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_2
name: default.srcbucket_mapjoin_part_2
+ Truncated Path -> Alias:
+ /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
+ /srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col7
+ input vertices:
+ 1 Map 2
+ Position of Big Table: 0
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 1
+ numRows 564
+ rawDataSize 10503
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11067
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: ds=2008-04-09
+ base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- ds 2008-04-09
+ ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
+ name default.srcbucket_mapjoin_part
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
- /srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
- Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {KEY.reducesinkkey0} {VALUE._col0}
- 1 {VALUE._col0}
- outputColumnNames: _col0, _col1, _col7
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 3
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ /srcbucket_mapjoin_part/ds=2008-04-08 [a]
Stage: Stage-0
Move Operator
@@ -1230,7 +1239,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 3
+ numFiles 1
numRows 564
rawDataSize 10503
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}