You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/15 18:11:42 UTC
svn commit: r1642997 [4/42] - in /hive/branches/spark:
itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/
ql/src/test/results/clientpositive/spark/
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out Tue Dec 2 19:57:10 2014
@@ -120,35 +120,34 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -162,18 +161,18 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
- bucket_field_name key
+ bucket_field_name value
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -189,75 +188,27 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
- bucket_field_name value
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
@@ -294,13 +245,36 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
+ Truncated Path -> Alias:
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -309,13 +283,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -331,20 +305,40 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
+ /srcbucket_mapjoin_part_2/part=1 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -464,45 +458,44 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -511,13 +504,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -533,71 +526,45 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part_1/part=2 [a]
+ Map 4
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -606,13 +573,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -628,19 +595,40 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
+ /srcbucket_mapjoin_part_2/part=1 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -759,45 +747,44 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -806,13 +793,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -828,71 +815,45 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part_1/part=2 [a]
+ Map 4
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -901,13 +862,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -923,19 +884,40 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
+ /srcbucket_mapjoin_part_2/part=1 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -1056,45 +1038,44 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=1
+ base file name: part=2
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 1
+ part 2
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -1103,13 +1084,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1120,76 +1101,50 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
- bucket_field_name key
+ bucket_field_name value
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part_1/part=2 [a]
+ Map 4
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: part=2
+ base file name: part=1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
- part 2
+ part 1
properties:
COLUMN_STATS_ACCURATE true
bucket_count 2
@@ -1198,13 +1153,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 500
partition_columns part
partition_columns.types string
rawDataSize 5312
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1215,24 +1170,45 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
- bucket_field_name value
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
+ /srcbucket_mapjoin_part_2/part=1 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out Tue Dec 2 19:57:10 2014
@@ -158,36 +158,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -200,107 +199,64 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
+ name default.srcbucket_mapjoin_part
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part/ds=2008-04-08 [a]
+ Map 3
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- input vertices:
- 1 Map 2
- Position of Big Table: 0
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -313,46 +269,87 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
+ name default.srcbucket_mapjoin_part_2
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 3062
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
+ name default.srcbucket_mapjoin_part_2
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part/ds=2008-04-08 [a]
+ /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col7
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -555,36 +552,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -597,112 +593,64 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
+ name default.srcbucket_mapjoin_part
+ numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 3062
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part/ds=2008-04-08 [a]
+ Map 3
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- input vertices:
- 1 Map 2
- Position of Big Table: 0
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -715,46 +663,92 @@ STAGE PLANS:
ds 2008-04-08
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
- numFiles 4
+ name default.srcbucket_mapjoin_part_2
+ numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 3062
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part
+ name default.srcbucket_mapjoin_part_2
partition_columns ds
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part
- name: default.srcbucket_mapjoin_part
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part/ds=2008-04-08 [a]
+ /srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col7
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 3
+ numRows 564
+ rawDataSize 10503
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11067
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -772,7 +766,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 1
+ numFiles 3
numRows 564
rawDataSize 10503
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
@@ -974,14 +968,15 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -994,16 +989,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 1
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1056,12 +1049,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [a]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -1071,57 +1059,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -1222,6 +1167,52 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
/srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col7
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 3
+ numRows 564
+ rawDataSize 10503
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11067
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -1239,7 +1230,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 1
+ numFiles 3
numRows 564
rawDataSize 10503
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}