You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/01/11 09:46:58 UTC
[2/5] hive git commit: HIVE-15539: Optimize complex multi-insert
queries in Calcite (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out
index 0e99972..318fc34 100644
--- a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out
@@ -1393,22 +1393,34 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col6
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
+ expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
File Output Operator
compressed: false
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
@@ -1416,15 +1428,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- File Output Operator
- compressed: false
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest2
+ name: default.dest2
Stage: Stage-0
Move Operator
@@ -1593,38 +1597,42 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
+ Select Operator
+ expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out
index fb07771..00705e7 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out
@@ -92,16 +92,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -279,16 +279,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -467,10 +467,14 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
Local Work:
Map Reduce Local Work
@@ -486,19 +490,19 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
index 92ca67b..9daaad9 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
@@ -4032,7 +4032,7 @@ STAGE PLANS:
alias: t2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 8) (type: boolean)
+ predicate: (UDFToDouble(key) = 8.0) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
index f7f4dbb..f345e7e 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
@@ -4181,7 +4181,7 @@ STAGE PLANS:
alias: t2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 8) (type: boolean)
+ predicate: (UDFToDouble(key) = 8.0) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out
new file mode 100644
index 0000000..b79d86c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/multi_insert_with_join2.q.out
@@ -0,0 +1,1006 @@
+PREHOOK: query: CREATE TABLE T_A ( id STRING, val STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T_A
+POSTHOOK: query: CREATE TABLE T_A ( id STRING, val STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T_A
+PREHOOK: query: CREATE TABLE T_B ( id STRING, val STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T_B
+POSTHOOK: query: CREATE TABLE T_B ( id STRING, val STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T_B
+PREHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@join_result_1
+POSTHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@join_result_1
+PREHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@join_result_3
+POSTHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@join_result_3
+PREHOOK: query: INSERT INTO TABLE T_A
+VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t_a
+POSTHOOK: query: INSERT INTO TABLE T_A
+VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t_a
+POSTHOOK: Lineage: t_a.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t_a.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE T_B
+VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t_b
+POSTHOOK: query: INSERT INTO TABLE T_B
+VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t_b
+POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (id = 'Id_1') (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((id = 'Id_1') and (val = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'Id_1' (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((id = 'Id_2') and (val <> 'val_104')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((val = 'val_104') and (id = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'Id_2' (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2') and (_col1 <> _col6)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT *
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT *
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT *
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT *
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.id, a.val, b.id, b.val
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.id, a.val, b.id, b.val
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.id, a.val, b.id, b.val
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.id, a.val, b.id, b.val
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.val, a.id, b.id, b.val
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.id, b.val, b.id, a.val
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.val, a.id, b.id, b.val
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.id, b.val, b.id, a.val
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col3 = 'Id_1') and (_col2 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col3 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/union17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union17.q.out b/ql/src/test/results/clientpositive/spark/union17.q.out
index ce23773..6ef83be 100644
--- a/ql/src/test/results/clientpositive/spark/union17.q.out
+++ b/ql/src/test/results/clientpositive/spark/union17.q.out
@@ -229,10 +229,10 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Output: default@dest2
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT DEST1.* FROM DEST1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/union18.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union18.q.out b/ql/src/test/results/clientpositive/spark/union18.q.out
index f9a28bb..aeaac97 100644
--- a/ql/src/test/results/clientpositive/spark/union18.q.out
+++ b/ql/src/test/results/clientpositive/spark/union18.q.out
@@ -172,10 +172,10 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Output: default@dest2
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT DEST1.* FROM DEST1 SORT BY DEST1.key, DEST1.value
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/union19.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union19.q.out b/ql/src/test/results/clientpositive/spark/union19.q.out
index d81c19b..6f225a7 100644
--- a/ql/src/test/results/clientpositive/spark/union19.q.out
+++ b/ql/src/test/results/clientpositive/spark/union19.q.out
@@ -197,10 +197,10 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Output: default@dest2
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest2.key EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: SELECT DEST1.* FROM DEST1 SORT BY DEST1.key, DEST1.value
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/union31.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union31.q.out b/ql/src/test/results/clientpositive/spark/union31.q.out
index a1f29eb..1f31dee 100644
--- a/ql/src/test/results/clientpositive/spark/union31.q.out
+++ b/ql/src/test/results/clientpositive/spark/union31.q.out
@@ -377,11 +377,11 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
@@ -398,11 +398,11 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
@@ -671,11 +671,11 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/union_remove_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out
index 1bdeb09..36145e9 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out
@@ -84,11 +84,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
index cebea03..aafc1bd 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out
@@ -90,11 +90,11 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
- outputColumnNames: key
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
- keys: key (type: string)
+ keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE