You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/13 18:44:42 UTC
svn commit: r1645338 [2/9] - in /hive/branches/spark: data/conf/spark/
itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/test/results/clientpositive/spark/
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out Sat Dec 13 17:44:41 2014
@@ -202,34 +202,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -242,44 +243,44 @@ STAGE PLANS:
part 1
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
- numFiles 2
+ name default.srcbucket_mapjoin_part_2
+ numFiles 4
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
#### A masked pattern was here ####
Partition
base file name: part=2
@@ -289,64 +290,90 @@ STAGE PLANS:
part 2
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
- numFiles 4
+ name default.srcbucket_mapjoin_part_2
+ numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
- Map 4
+ /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_2/part=2 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -359,44 +386,44 @@ STAGE PLANS:
part 1
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 4
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
#### A masked pattern was here ####
Partition
base file name: part=2
@@ -406,69 +433,48 @@ STAGE PLANS:
part 2
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
+ name default.srcbucket_mapjoin_part_1
+ numFiles 4
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
- /srcbucket_mapjoin_part_2/part=2 [b]
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -603,34 +609,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int), part (type: string)
- sort order: ++
- Map-reduce partition columns: key (type: int), part (type: string)
- Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int), part (type: string)
+ 1 key (type: int), part (type: string)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -643,44 +650,44 @@ STAGE PLANS:
part 1
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
- numFiles 2
+ name default.srcbucket_mapjoin_part_2
+ numFiles 4
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
#### A masked pattern was here ####
Partition
base file name: part=2
@@ -690,64 +697,90 @@ STAGE PLANS:
part 2
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
- numFiles 4
+ name default.srcbucket_mapjoin_part_2
+ numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
- Map 4
+ /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_2/part=2 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int), part (type: string)
- sort order: ++
- Map-reduce partition columns: key (type: int), part (type: string)
- Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int), part (type: string)
+ 1 key (type: int), part (type: string)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -760,44 +793,44 @@ STAGE PLANS:
part 1
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 4
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 4
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 2750
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
#### A masked pattern was here ####
Partition
base file name: part=2
@@ -807,69 +840,48 @@ STAGE PLANS:
part 2
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
- numFiles 2
+ name default.srcbucket_mapjoin_part_1
+ numFiles 4
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 2750
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 4
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
- /srcbucket_mapjoin_part_2/part=2 [b]
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out Sat Dec 13 17:44:41 2014
@@ -161,34 +161,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -207,13 +208,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -223,41 +224,66 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
- bucket_field_name key
+ bucket_count -1
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- Map 4
+ /srcbucket_mapjoin_part_2/part=1 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -276,13 +302,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -292,45 +318,25 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count -1
+ bucket_count 2
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
+ /srcbucket_mapjoin_part_1/part=1 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -455,34 +461,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Position of Big Table: 0
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -495,19 +502,18 @@ STAGE PLANS:
part 1
properties:
COLUMN_STATS_ACCURATE true
- bucket_count 2
- bucket_field_name key
+ bucket_count -1
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_3
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -523,35 +529,61 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_3
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_3
+ name: default.srcbucket_mapjoin_part_3
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- Map 4
+ /srcbucket_mapjoin_part_3/part=1 [b]
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ input vertices:
+ 1 Map 3
+ Position of Big Table: 0
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Local Work:
+ Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -564,18 +596,19 @@ STAGE PLANS:
part 1
properties:
COLUMN_STATS_ACCURATE true
- bucket_count -1
+ bucket_count 2
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_3
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -591,40 +624,19 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_3
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_3
- name: default.srcbucket_mapjoin_part_3
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_3/part=1 [b]
+ /srcbucket_mapjoin_part_1/part=1 [a]
Reducer 2
- Needs Tagging: true
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator