You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/15 18:11:42 UTC
svn commit: r1642997 [5/42] - in /hive/branches/spark:
itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/
ql/src/test/results/clientpositive/spark/
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out Tue Dec 2 19:57:10 2014
@@ -189,14 +189,15 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -209,16 +210,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 1
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -271,12 +270,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [a]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -286,52 +280,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -384,6 +340,47 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col7
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -593,14 +590,15 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -613,16 +611,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 1
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -675,12 +671,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [a]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -690,57 +681,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 564
- rawDataSize 10503
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 11067
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -793,6 +741,52 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col7
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 3
+ numRows 564
+ rawDataSize 10503
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11067
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -810,7 +804,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 1
+ numFiles 3
numRows 564
rawDataSize 10503
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out Tue Dec 2 19:57:10 2014
@@ -175,36 +175,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -251,68 +250,25 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
- /srcbucket_mapjoin [b]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin [a]
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col6
- input vertices:
- 1 Map 2
- Position of Big Table: 0
- Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -359,7 +315,48 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
- /srcbucket_mapjoin [a]
+ /srcbucket_mapjoin [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col6
+ Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -543,36 +540,35 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -619,73 +615,25 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
- /srcbucket_mapjoin [b]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin [a]
+ Map 3
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col6
- input vertices:
- 1 Map 2
- Position of Big Table: 0
- Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 2
- numRows 464
- rawDataSize 8519
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 8983
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -732,7 +680,53 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
- /srcbucket_mapjoin [a]
+ /srcbucket_mapjoin [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col6
+ Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 3
+ numRows 464
+ rawDataSize 8519
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 8983
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -750,7 +744,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 2
+ numFiles 3
numRows 464
rawDataSize 8519
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out Tue Dec 2 19:57:10 2014
@@ -225,14 +225,15 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -245,16 +246,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 1
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -302,12 +301,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -317,52 +311,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col6
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -463,6 +419,47 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [b]
/srcbucket_mapjoin_part/ds=2008-04-09 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col6
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -658,14 +655,15 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -678,16 +676,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 1
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -735,12 +731,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
-
- Stage: Stage-1
- Spark
-#### A masked pattern was here ####
- Vertices:
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -750,57 +741,14 @@ STAGE PLANS:
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {value}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col6
- input vertices:
- 0 Map 1
- Position of Big Table: 1
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE true
- bucket_count -1
- columns key,value1,value2
- columns.comments
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.bucketmapjoin_tmp_result
- numFiles 1
- numRows 928
- rawDataSize 17038
- serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 17966
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketmapjoin_tmp_result
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -901,6 +849,52 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
/srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col6
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE true
+ bucket_count -1
+ columns key,value1,value2
+ columns.comments
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.bucketmapjoin_tmp_result
+ numFiles 3
+ numRows 928
+ rawDataSize 17038
+ serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 17966
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketmapjoin_tmp_result
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -918,7 +912,7 @@ STAGE PLANS:
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
- numFiles 1
+ numFiles 3
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out Tue Dec 2 19:57:10 2014
@@ -140,35 +140,34 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Reducer 2 (SORT, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0 {key}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -188,13 +187,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -210,61 +209,36 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns ds/hr
partition_columns.types string:string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (SORT, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a]
+ Map 4
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key}
- 1 {value}
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col8
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col8 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: string)
- sort order: ++
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: value (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -284,13 +258,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 0
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -306,19 +280,40 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns ds/hr
partition_columns.types string:string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a]
+ /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col8
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col8 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Select Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out?rev=1642997&r1=1642996&r2=1642997&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out Tue Dec 2 19:57:10 2014
@@ -126,35 +126,34 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -173,13 +172,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -189,67 +188,41 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 3
+ bucket_count 2
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ Map 4
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -268,13 +241,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -284,25 +257,46 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- bucket_count 2
+ bucket_count 3
bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_2/part=1 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -435,35 +429,34 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-1
Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 3
+ Map 1
Map Operator Tree:
TableScan
- alias: b
+ alias: a
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- Position of Big Table: 0
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
+ tag: 0
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -482,13 +475,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -499,66 +492,40 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
- bucket_field_name value
+ bucket_field_name key
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_2
+ name default.srcbucket_mapjoin_part_1
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_2
- name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_2/part=1 [b]
-
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
- Vertices:
- Map 1
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ Map 4
Map Operator Tree:
TableScan
- alias: a
+ alias: b
Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0
- 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- input vertices:
- 1 Map 3
- Position of Big Table: 0
- Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -577,13 +544,13 @@ STAGE PLANS:
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 0
partition_columns part
partition_columns.types string
rawDataSize 0
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 2750
@@ -594,24 +561,45 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
- bucket_field_name key
+ bucket_field_name value
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
- name default.srcbucket_mapjoin_part_1
+ name default.srcbucket_mapjoin_part_2
partition_columns part
partition_columns.types string
- serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcbucket_mapjoin_part_1
- name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_2
+ name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_2/part=1 [b]
Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator