You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/08/16 03:22:02 UTC
svn commit: r1514554 [15/18] - in /hive/branches/vectorization: ./
beeline/src/java/org/apache/hive/beeline/
cli/src/java/org/apache/hadoop/hive/cli/
cli/src/test/org/apache/hadoop/hive/cli/
common/src/java/org/apache/hadoop/hive/conf/ conf/ contrib/sr...
Modified: hive/branches/vectorization/ql/src/test/results/clientpositive/multiMapJoin1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/results/clientpositive/multiMapJoin1.q.out?rev=1514554&r1=1514553&r2=1514554&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/results/clientpositive/multiMapJoin1.q.out (original)
+++ hive/branches/vectorization/ql/src/test/results/clientpositive/multiMapJoin1.q.out Fri Aug 16 01:21:54 2013
@@ -1,8 +1,6 @@
-PREHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job
-create table smallTbl1(key string, value string)
+PREHOOK: query: create table smallTbl1(key string, value string)
PREHOOK: type: CREATETABLE
-POSTHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job
-create table smallTbl1(key string, value string)
+POSTHOOK: query: create table smallTbl1(key string, value string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@smallTbl1
PREHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10
@@ -34,6 +32,56 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table smallTbl3(key string, value string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table smallTbl3(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@smallTbl3
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@smalltbl3
+POSTHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@smalltbl3
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table smallTbl4(key string, value string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table smallTbl4(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@smallTbl4
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table smallTbl4 select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@smalltbl4
+POSTHOOK: query: insert overwrite table smallTbl4 select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@smalltbl4
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: create table bigTbl(key string, value string)
PREHOOK: type: CREATETABLE
POSTHOOK: query: create table bigTbl(key string, value string)
@@ -43,6 +91,10 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: insert overwrite table bigTbl
select * from
(
@@ -101,6 +153,10 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: explain
select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
@@ -125,13 +181,16 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
Stage-7 is a root stage
- Stage-6 depends on stages: Stage-7
- Stage-2 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
@@ -170,7 +229,7 @@ STAGE PLANS:
1 [Column[value]]
Position of Big Table: 0
- Stage: Stage-6
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
firstjoin:bigtbl
@@ -211,25 +270,14 @@ STAGE PLANS:
bucketGroup: false
mode: hash
outputColumnNames: _col0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -284,6 +332,10 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
580
PREHOOK: query: -- Now run a query with two-way join, which should be converted into a
-- map-join followed by groupby - two MR jobs overall
@@ -313,13 +365,16 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
Stage-7 is a root stage
- Stage-6 depends on stages: Stage-7
- Stage-2 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
@@ -358,7 +413,7 @@ STAGE PLANS:
1 [Column[value]]
Position of Big Table: 0
- Stage: Stage-6
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
firstjoin:bigtbl
@@ -399,25 +454,14 @@ STAGE PLANS:
bucketGroup: false
mode: hash
outputColumnNames: _col0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -447,7 +491,7 @@ PREHOOK: query: select count(*) FROM
bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
PREHOOK: type: QUERY
PREHOOK: Input: default@bigtbl
@@ -459,7 +503,7 @@ POSTHOOK: query: select count(*) FROM
bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bigtbl
@@ -472,30 +516,34 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
580
PREHOOK: query: -- Now run a query with two-way join, which should first be converted into a
-- map-join followed by groupby and then finally into a single MR job.
-#### A masked pattern was here ####
+explain
select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
- bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
group by smallTbl2.key
PREHOOK: type: QUERY
POSTHOOK: query: -- Now run a query with two-way join, which should first be converted into a
-- map-join followed by groupby and then finally into a single MR job.
-#### A masked pattern was here ####
+explain
select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
- bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
group by smallTbl2.key
POSTHOOK: type: QUERY
@@ -505,13 +553,17 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
-#### A masked pattern was here ####
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL smallTbl2) key))))
STAGE DEPENDENCIES:
Stage-7 is a root stage
- Stage-6 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-7
+ Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-7
@@ -549,7 +601,7 @@ STAGE PLANS:
1 [Column[value]]
Position of Big Table: 0
- Stage: Stage-6
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
firstjoin:bigtbl
@@ -629,25 +681,22 @@ STAGE PLANS:
outputColumnNames: _col0
File Output Operator
compressed: false
- GlobalTableId: 1
+ GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Stage: Stage-0
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
+ Fetch Operator
+ limit: -1
-#### A masked pattern was here ####
-select count(*) FROM
+PREHOOK: query: select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
- bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
group by smallTbl2.key
PREHOOK: type: QUERY
@@ -655,12 +704,12 @@ PREHOOK: Input: default@bigtbl
PREHOOK: Input: default@smalltbl1
PREHOOK: Input: default@smalltbl2
#### A masked pattern was here ####
-select count(*) FROM
+POSTHOOK: query: select count(*) FROM
(select bigTbl.key as key, bigTbl.value as value1,
- bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
on (bigTbl.key = smallTbl1.key)
) firstjoin
-JOIN
+JOIN
smallTbl2 on (firstjoin.value1 = smallTbl2.value)
group by smallTbl2.key
POSTHOOK: type: QUERY
@@ -674,33 +723,16 @@ POSTHOOK: Lineage: smalltbl1.key SIMPLE
POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: create table smallTbl3(key string, value string)
-PREHOOK: type: CREATETABLE
-POSTHOOK: query: create table smallTbl3(key string, value string)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: default@smallTbl3
-POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@smalltbl3
-POSTHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@smalltbl3
-POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+270
+10
+10
+270
+10
+10
PREHOOK: query: drop table bigTbl
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@bigtbl
@@ -717,6 +749,8 @@ POSTHOOK: Lineage: smalltbl2.key SIMPLE
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: create table bigTbl(key1 string, key2 string, value string)
PREHOOK: type: CREATETABLE
POSTHOOK: query: create table bigTbl(key1 string, key2 string, value string)
@@ -730,6 +764,8 @@ POSTHOOK: Lineage: smalltbl2.key SIMPLE
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: insert overwrite table bigTbl
select * from
(
@@ -793,35 +829,75 @@ POSTHOOK: Lineage: smalltbl2.key SIMPLE
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key)
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- First disable noconditionaltask
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key)
PREHOOK: type: QUERY
-POSTHOOK: query: explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key)
+POSTHOOK: query: -- First disable noconditionaltask
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key)
POSTHOOK: type: QUERY
POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -834,80 +910,94 @@ POSTHOOK: Lineage: smalltbl2.key SIMPLE
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) value1) value1) (TOK_S
ELEXPR (. (TOK_TABLE_OR_COL firstjoin) value2) value2)))) secondjoin) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL secondjoin) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join
1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (.
(TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2)))))))
STAGE DEPENDENCIES:
- Stage-16 is a root stage , consists of Stage-21, Stage-22, Stage-1
- Stage-21 has a backup stage: Stage-1
- Stage-14 depends on stages: Stage-21
- Stage-13 depends on stages: Stage-1, Stage-14, Stage-15 , consists of Stage-19, Stage-20, Stage-2
- Stage-19 has a backup stage: Stage-2
- Stage-11 depends on stages: Stage-19
- Stage-10 depends on stages: Stage-2, Stage-11, Stage-12 , consists of Stage-17, Stage-18, Stage-3
- Stage-17 has a backup stage: Stage-3
- Stage-8 depends on stages: Stage-17
- Stage-4 depends on stages: Stage-3, Stage-8, Stage-9
- Stage-18 has a backup stage: Stage-3
- Stage-9 depends on stages: Stage-18
+ Stage-20 is a root stage , consists of Stage-27, Stage-28, Stage-5
+ Stage-27 has a backup stage: Stage-5
+ Stage-18 depends on stages: Stage-27
+ Stage-17 depends on stages: Stage-5, Stage-18, Stage-19 , consists of Stage-25, Stage-26, Stage-1
+ Stage-25 has a backup stage: Stage-1
+ Stage-15 depends on stages: Stage-25
+ Stage-14 depends on stages: Stage-1, Stage-15, Stage-16 , consists of Stage-23, Stage-24, Stage-2
+ Stage-23 has a backup stage: Stage-2
+ Stage-12 depends on stages: Stage-23
+ Stage-11 depends on stages: Stage-2, Stage-12, Stage-13 , consists of Stage-21, Stage-22, Stage-3
+ Stage-21 has a backup stage: Stage-3
+ Stage-9 depends on stages: Stage-21
+ Stage-4 depends on stages: Stage-3, Stage-9, Stage-10
+ Stage-22 has a backup stage: Stage-3
+ Stage-10 depends on stages: Stage-22
Stage-3
- Stage-20 has a backup stage: Stage-2
- Stage-12 depends on stages: Stage-20
+ Stage-24 has a backup stage: Stage-2
+ Stage-13 depends on stages: Stage-24
Stage-2
- Stage-22 has a backup stage: Stage-1
- Stage-15 depends on stages: Stage-22
+ Stage-26 has a backup stage: Stage-1
+ Stage-16 depends on stages: Stage-26
Stage-1
+ Stage-28 has a backup stage: Stage-5
+ Stage-19 depends on stages: Stage-28
+ Stage-5
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-16
+ Stage: Stage-20
Conditional Operator
- Stage: Stage-21
+ Stage: Stage-27
Map Reduce Local Work
Alias -> Map Local Tables:
- secondjoin:firstjoin:smalltbl1
+ join3:join2:join1:smalltbl1
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- secondjoin:firstjoin:smalltbl1
+ join3:join2:join1:smalltbl1
TableScan
alias: smalltbl1
HashTable Sink Operator
condition expressions:
- 0 {key2} {value}
- 1
+ 0 {key1} {key2} {value}
+ 1 {key}
handleSkewJoin: false
keys:
0 [Column[key1]]
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-14
+ Stage: Stage-18
Map Reduce
Alias -> Map Operator Tree:
- secondjoin:firstjoin:bigtbl
+ join3:join2:join1:bigtbl
TableScan
alias: bigtbl
Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {key2} {value}
- 1
+ 0 {key1} {key2} {value}
+ 1 {key}
handleSkewJoin: false
keys:
0 [Column[key1]]
1 [Column[key]]
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col5
Position of Big Table: 0
Select Operator
expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
+ expr: _col5
+ type: string
+ expr: _col2
+ type: string
expr: _col2
type: string
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
compressed: false
GlobalTableId: 0
@@ -917,30 +1007,30 @@ STAGE PLANS:
Local Work:
Map Reduce Local Work
- Stage: Stage-13
+ Stage: Stage-17
Conditional Operator
- Stage: Stage-19
+ Stage: Stage-25
Map Reduce Local Work
Alias -> Map Local Tables:
- secondjoin:smalltbl2
+ join3:join2:smalltbl2
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- secondjoin:smalltbl2
+ join3:join2:smalltbl2
TableScan
alias: smalltbl2
HashTable Sink Operator
condition expressions:
- 0 {_col1}
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4}
+ 1 {key}
handleSkewJoin: false
keys:
- 0 [Column[_col2]]
+ 0 [Column[_col3]]
1 [Column[value]]
Position of Big Table: 0
- Stage: Stage-11
+ Stage: Stage-15
Map Reduce
Alias -> Map Operator Tree:
$INTNAME
@@ -948,19 +1038,29 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {_col1}
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4}
+ 1 {key}
handleSkewJoin: false
keys:
- 0 [Column[_col2]]
+ 0 [Column[_col3]]
1 [Column[value]]
- outputColumnNames: _col1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Position of Big Table: 0
Select Operator
expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
- outputColumnNames: _col1
+ expr: _col2
+ type: string
+ expr: _col5
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
File Output Operator
compressed: false
GlobalTableId: 0
@@ -970,30 +1070,30 @@ STAGE PLANS:
Local Work:
Map Reduce Local Work
- Stage: Stage-10
+ Stage: Stage-14
Conditional Operator
- Stage: Stage-17
+ Stage: Stage-23
Map Reduce Local Work
Alias -> Map Local Tables:
- smalltbl3
+ join3:smalltbl3
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- smalltbl3
+ join3:smalltbl3
TableScan
alias: smalltbl3
HashTable Sink Operator
condition expressions:
- 0
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5}
+ 1 {key}
handleSkewJoin: false
keys:
0 [Column[_col1]]
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-8
+ Stage: Stage-12
Map Reduce
Alias -> Map Operator Tree:
$INTNAME
@@ -1001,20 +1101,111 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
condition expressions:
- 0
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5}
+ 1 {key}
handleSkewJoin: false
keys:
0 [Column[_col1]]
1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col6
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-11
+ Conditional Operator
+
+ Stage: Stage-21
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ smalltbl4
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ smalltbl4
+ TableScan
+ alias: smalltbl4
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6}
+ 1 {key}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col2]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6}
+ 1 {key}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col2]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Position of Big Table: 0
Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col7
+ type: string
+ expr: _col5
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6
Group By Operator
aggregations:
- expr: count()
+ expr: sum(hash(_col0))
+ expr: sum(hash(_col1))
+ expr: sum(hash(_col2))
+ expr: sum(hash(_col3))
+ expr: sum(hash(_col4))
+ expr: sum(hash(_col7))
+ expr: sum(hash(_col5))
+ expr: sum(hash(_col6))
bucketGroup: false
mode: hash
- outputColumnNames: _col0
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1034,18 +1225,53 @@ STAGE PLANS:
value expressions:
expr: _col0
type: bigint
- Reduce Operator Tree:
- Group By Operator
- aggregations:
- expr: count(VALUE._col0)
- bucketGroup: false
- mode: mergepartial
- outputColumnNames: _col0
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ expr: _col3
+ type: bigint
+ expr: _col4
+ type: bigint
+ expr: _col5
+ type: bigint
+ expr: _col6
+ type: bigint
+ expr: _col7
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ expr: sum(VALUE._col1)
+ expr: sum(VALUE._col2)
+ expr: sum(VALUE._col3)
+ expr: sum(VALUE._col4)
+ expr: sum(VALUE._col5)
+ expr: sum(VALUE._col6)
+ expr: sum(VALUE._col7)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Select Operator
expressions:
expr: _col0
type: bigint
- outputColumnNames: _col0
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ expr: _col3
+ type: bigint
+ expr: _col4
+ type: bigint
+ expr: _col5
+ type: bigint
+ expr: _col6
+ type: bigint
+ expr: _col7
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1053,7 +1279,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- Stage: Stage-18
+ Stage: Stage-22
Map Reduce Local Work
Alias -> Map Local Tables:
$INTNAME
@@ -1063,38 +1289,64 @@ STAGE PLANS:
$INTNAME
HashTable Sink Operator
condition expressions:
- 0
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6}
+ 1 {key}
handleSkewJoin: false
keys:
- 0 [Column[_col1]]
+ 0 [Column[_col2]]
1 [Column[key]]
Position of Big Table: 1
- Stage: Stage-9
+ Stage: Stage-10
Map Reduce
Alias -> Map Operator Tree:
- smalltbl3
+ smalltbl4
TableScan
- alias: smalltbl3
+ alias: smalltbl4
Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6}
+ 1 {key}
handleSkewJoin: false
keys:
- 0 [Column[_col1]]
+ 0 [Column[_col2]]
1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Position of Big Table: 1
Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col7
+ type: string
+ expr: _col5
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6
Group By Operator
aggregations:
- expr: count()
+ expr: sum(hash(_col0))
+ expr: sum(hash(_col1))
+ expr: sum(hash(_col2))
+ expr: sum(hash(_col3))
+ expr: sum(hash(_col4))
+ expr: sum(hash(_col7))
+ expr: sum(hash(_col5))
+ expr: sum(hash(_col6))
bucketGroup: false
mode: hash
- outputColumnNames: _col0
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1110,16 +1362,31 @@ STAGE PLANS:
$INTNAME
Reduce Output Operator
key expressions:
- expr: _col1
+ expr: _col2
type: string
sort order: +
Map-reduce partition columns:
- expr: _col1
+ expr: _col2
type: string
tag: 0
- smalltbl3
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col6
+ type: string
+ smalltbl4
TableScan
- alias: smalltbl3
+ alias: smalltbl4
Reduce Output Operator
key expressions:
expr: key
@@ -1129,21 +1396,50 @@ STAGE PLANS:
expr: key
type: string
tag: 1
+ value expressions:
+ expr: key
+ type: string
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0
- 1
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6}
+ 1 {VALUE._col0}
handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col7
+ type: string
+ expr: _col5
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6
Group By Operator
aggregations:
- expr: count()
+ expr: sum(hash(_col0))
+ expr: sum(hash(_col1))
+ expr: sum(hash(_col2))
+ expr: sum(hash(_col3))
+ expr: sum(hash(_col4))
+ expr: sum(hash(_col7))
+ expr: sum(hash(_col5))
+ expr: sum(hash(_col6))
bucketGroup: false
mode: hash
- outputColumnNames: _col0
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1151,7 +1447,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-20
+ Stage: Stage-24
Map Reduce Local Work
Alias -> Map Local Tables:
$INTNAME
@@ -1161,37 +1457,183 @@ STAGE PLANS:
$INTNAME
HashTable Sink Operator
condition expressions:
- 0 {_col1}
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5}
+ 1 {key}
handleSkewJoin: false
keys:
- 0 [Column[_col2]]
+ 0 [Column[_col1]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-13
+ Map Reduce
+ Alias -> Map Operator Tree:
+ join3:smalltbl3
+ TableScan
+ alias: smalltbl3
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5}
+ 1 {key}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col6
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ join3:smalltbl3
+ TableScan
+ alias: smalltbl3
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col6
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-26
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4}
+ 1 {key}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col3]]
1 [Column[value]]
Position of Big Table: 1
- Stage: Stage-12
+ Stage: Stage-16
Map Reduce
Alias -> Map Operator Tree:
- secondjoin:smalltbl2
+ join3:join2:smalltbl2
TableScan
alias: smalltbl2
Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {_col1}
- 1
+ 0 {_col0} {_col1} {_col2} {_col3} {_col4}
+ 1 {key}
handleSkewJoin: false
keys:
- 0 [Column[_col2]]
+ 0 [Column[_col3]]
1 [Column[value]]
- outputColumnNames: _col1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Position of Big Table: 1
Select Operator
expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
- outputColumnNames: _col1
+ expr: _col2
+ type: string
+ expr: _col5
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1201,23 +1643,31 @@ STAGE PLANS:
Local Work:
Map Reduce Local Work
- Stage: Stage-2
+ Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
$INTNAME
Reduce Output Operator
key expressions:
- expr: _col2
+ expr: _col3
type: string
sort order: +
Map-reduce partition columns:
- expr: _col2
+ expr: _col3
type: string
tag: 0
value expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
- secondjoin:smalltbl2
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ join3:join2:smalltbl2
TableScan
alias: smalltbl2
Reduce Output Operator
@@ -1229,20 +1679,33 @@ STAGE PLANS:
expr: value
type: string
tag: 1
+ value expressions:
+ expr: key
+ type: string
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {VALUE._col1}
- 1
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4}
+ 1 {VALUE._col0}
handleSkewJoin: false
- outputColumnNames: _col1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Select Operator
expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
- outputColumnNames: _col1
+ expr: _col2
+ type: string
+ expr: _col5
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1250,51 +1713,57 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-22
+ Stage: Stage-28
Map Reduce Local Work
Alias -> Map Local Tables:
- secondjoin:firstjoin:bigtbl
+ join3:join2:join1:bigtbl
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- secondjoin:firstjoin:bigtbl
+ join3:join2:join1:bigtbl
TableScan
alias: bigtbl
HashTable Sink Operator
condition expressions:
- 0 {key2} {value}
- 1
+ 0 {key1} {key2} {value}
+ 1 {key}
handleSkewJoin: false
keys:
0 [Column[key1]]
1 [Column[key]]
Position of Big Table: 1
- Stage: Stage-15
+ Stage: Stage-19
Map Reduce
Alias -> Map Operator Tree:
- secondjoin:firstjoin:smalltbl1
+ join3:join2:join1:smalltbl1
TableScan
alias: smalltbl1
Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {key2} {value}
- 1
+ 0 {key1} {key2} {value}
+ 1 {key}
handleSkewJoin: false
keys:
0 [Column[key1]]
1 [Column[key]]
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col5
Position of Big Table: 1
Select Operator
expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
+ expr: _col5
+ type: string
+ expr: _col2
+ type: string
expr: _col2
type: string
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1304,10 +1773,10 @@ STAGE PLANS:
Local Work:
Map Reduce Local Work
- Stage: Stage-1
+ Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
- secondjoin:firstjoin:bigtbl
+ join3:join2:join1:bigtbl
TableScan
alias: bigtbl
Reduce Output Operator
@@ -1320,11 +1789,13 @@ STAGE PLANS:
type: string
tag: 0
value expressions:
+ expr: key1
+ type: string
expr: key2
type: string
expr: value
type: string
- secondjoin:firstjoin:smalltbl1
+ join3:join2:join1:smalltbl1
TableScan
alias: smalltbl1
Reduce Output Operator
@@ -1336,22 +1807,31 @@ STAGE PLANS:
expr: key
type: string
tag: 1
+ value expressions:
+ expr: key
+ type: string
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {VALUE._col1} {VALUE._col2}
- 1
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ 1 {VALUE._col0}
handleSkewJoin: false
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col5
Select Operator
expressions:
+ expr: _col0
+ type: string
expr: _col1
type: string
+ expr: _col5
+ type: string
expr: _col2
type: string
- outputColumnNames: _col1, _col2
+ expr: _col2
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1364,43 +1844,81 @@ STAGE PLANS:
limit: -1
-PREHOOK: query: select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key)
+PREHOOK: query: SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@bigtbl
PREHOOK: Input: default@smalltbl1
PREHOOK: Input: default@smalltbl2
PREHOOK: Input: default@smalltbl3
+PREHOOK: Input: default@smalltbl4
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key)
+POSTHOOK: query: SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bigtbl
POSTHOOK: Input: default@smalltbl1
POSTHOOK: Input: default@smalltbl2
POSTHOOK: Input: default@smalltbl3
+POSTHOOK: Input: default@smalltbl4
#### A masked pattern was here ####
POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1413,40 +1931,80 @@ POSTHOOK: Lineage: smalltbl2.key SIMPLE
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-1660
-PREHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job,
--- So, overall two jobs - one for multi-way join and one for count(*)
-explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key)
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+247580 247580 247580 247580 247580 247580 548662743780 548662743780
+PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size
+-- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4).
+-- We will use a single MR job to evaluate this query.
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key)
PREHOOK: type: QUERY
-POSTHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job,
--- So, overall two jobs - one for multi-way join and one for count(*)
-explain
-select count(*) FROM
- (
- SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3,
- firstjoin.value1 as value1, firstjoin.value2 as value2 FROM
- (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2,
- bigTbl.value as value1, bigTbl.value as value2
- FROM bigTbl JOIN smallTbl1
- on (bigTbl.key1 = smallTbl1.key)
- ) firstjoin
- JOIN
- smallTbl2 on (firstjoin.value1 = smallTbl2.value)
- ) secondjoin
- JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key)
+POSTHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size
+-- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4).
+-- We will use a single MR job to evaluate this query.
+EXPLAIN
+SELECT SUM(HASH(join3.key1)),
+ SUM(HASH(join3.key2)),
+ SUM(HASH(join3.key3)),
+ SUM(HASH(join3.key4)),
+ SUM(HASH(join3.key5)),
+ SUM(HASH(smallTbl4.key)),
+ SUM(HASH(join3.value1)),
+ SUM(HASH(join3.value2))
+FROM (SELECT join2.key1 as key1,
+ join2.key2 as key2,
+ join2.key3 as key3,
+ join2.key4 as key4,
+ smallTbl3.key as key5,
+ join2.value1 as value1,
+ join2.value2 as value2
+ FROM (SELECT join1.key1 as key1,
+ join1.key2 as key2,
+ join1.key3 as key3,
+ smallTbl2.key as key4,
+ join1.value1 as value1,
+ join1.value2 as value2
+ FROM (SELECT bigTbl.key1 as key1,
+ bigTbl.key2 as key2,
+ smallTbl1.key as key3,
+ bigTbl.value as value1,
+ bigTbl.value as value2
+ FROM bigTbl
+ JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1
+ JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2
+ JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3
+JOIN smallTbl4 ON (join3.key3 = smallTbl4.key)
POSTHOOK: type: QUERY
POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1459,157 +2017,268 @@ POSTHOOK: Lineage: smalltbl2.key SIMPLE
POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL firstjoin) value1) value1) (TOK_S
ELEXPR (. (TOK_TABLE_OR_COL firstjoin) value2) value2)))) secondjoin) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL secondjoin) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join
1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (.
(TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2)))))))
STAGE DEPENDENCIES:
- Stage-11 is a root stage
- Stage-10 depends on stages: Stage-11
- Stage-4 depends on stages: Stage-10
+ Stage-13 is a root stage
+ Stage-4 depends on stages: Stage-13
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-11
+ Stage: Stage-13
Map Reduce Local Work
Alias -> Map Local Tables:
- secondjoin:firstjoin:smalltbl1
+ join3:join2:join1:smalltbl1
+ Fetch Operator
+ limit: -1
+ join3:join2:smalltbl2
Fetch Operator
limit: -1
- secondjoin:smalltbl2
+ join3:smalltbl3
Fetch Operator
limit: -1
- smalltbl3
+ smalltbl4
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- secondjoin:firstjoin:smalltbl1
+ join3:join2:join1:smalltbl1
TableScan
alias: smalltbl1
HashTable Sink Operator
[... 2077 lines stripped ...]