You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/11/20 22:12:16 UTC
[1/2] hive git commit: HIVE-11684: Implement limit pushdown through
outer join in CBO (Jesus Camacho Rodriguez,
reviewed by Laljo John Pullokkaran)
Repository: hive
Updated Branches:
refs/heads/master 97cb0c6e0 -> 0b9009770
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
index 937e8fc..e2ae796 100644
--- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
@@ -1767,18 +1767,22 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: y
+ alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 1
+ Filter Operator
+ isSamplingPred: false
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ Position of Big Table: 1
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -1786,9 +1790,12 @@ STAGE PLANS:
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: src
+ base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -1796,11 +1803,13 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
+ name default.srcpart
numFiles 1
numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1810,45 +1819,38 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.src
- numFiles 1
- numRows 500
- rawDataSize 5312
- serialization.ddl struct src { string key, string value}
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src
- name: default.src
+ name: default.srcpart
+ name: default.srcpart
Truncated Path -> Alias:
- /src [y]
- Map 3
+ /srcpart/ds=2008-04-08/hr=11 [z]
+ Map 2
Map Operator Tree:
TableScan
- alias: z
+ alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 1
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -1856,12 +1858,9 @@ STAGE PLANS:
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: hr=11
+ base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 11
properties:
COLUMN_STATS_ACCURATE true
bucket_count -1
@@ -1869,13 +1868,11 @@ STAGE PLANS:
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src
numFiles 1
numRows 500
- partition_columns ds/hr
- partition_columns.types string:string
rawDataSize 5312
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
@@ -1885,29 +1882,32 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
+ COLUMN_STATS_ACCURATE true
bucket_count -1
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- partition_columns ds/hr
- partition_columns.types string:string
- serialization.ddl struct srcpart { string key, string value}
+ name default.src
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
+ name: default.src
+ name: default.src
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [z]
+ /src [y]
Stage: Stage-1
Spark
#### A masked pattern was here ####
Vertices:
- Map 2
+ Map 3
Map Operator Tree:
TableScan
alias: x
@@ -1929,7 +1929,7 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col1, _col2
input vertices:
- 0 Map 1
+ 0 Map 2
Position of Big Table: 1
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -1940,15 +1940,15 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3, _col4
input vertices:
- 1 Map 3
- Position of Big Table: 0
+ 0 Map 1
+ Position of Big Table: 1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/spark/join_merging.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join_merging.q.out b/ql/src/test/results/clientpositive/spark/join_merging.q.out
index 6c5089b..baed227 100644
--- a/ql/src/test/results/clientpositive/spark/join_merging.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_merging.q.out
@@ -114,8 +114,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
- Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -123,6 +123,20 @@ STAGE PLANS:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: p1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (p_size > 10) (type: boolean)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
@@ -136,7 +150,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: p1
@@ -151,20 +165,6 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
- Map 5
- Map Operator Tree:
- TableScan
- alias: p1
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: p_partkey (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
Join Operator
@@ -172,6 +172,27 @@ STAGE PLANS:
Left Outer Join0 to 1
keys:
0 _col0 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col2, _col4
+ Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col4 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col1, _col2, _col3
Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
@@ -184,27 +205,6 @@ STAGE PLANS:
Map-reduce partition columns: _col2 (type: int)
Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col3 (type: int)
- Reducer 3
- Reduce Operator Tree:
- Join Operator
- condition map:
- Right Outer Join0 to 1
- keys:
- 0 _col2 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1, _col3
- Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: int), _col3 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin.q.out b/ql/src/test/results/clientpositive/subquery_notin.q.out
index 5563794..12bd35e 100644
--- a/ql/src/test/results/clientpositive/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notin.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: -- non agg, non corr
explain
select *
@@ -18,64 +18,70 @@ where src.key not in
)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
- Stage-2 depends on stages: Stage-1
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key > '2') and key is null) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ predicate: (key > '2') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col0 = 0) (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ predicate: _col2 is null (type: boolean)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-1
+ Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
TableScan
Reduce Output Operator
sort order:
@@ -88,62 +94,52 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-2
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key > '2') (type: boolean)
- Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key > '2') and key is null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reduce Operator Tree:
- Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: _col3 is null (type: boolean)
- Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE
+ predicate: (_col0 = 0) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
@@ -151,7 +147,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: select *
from src
where src.key not in ( select key from src s1 where s1.key > '2')
@@ -1243,7 +1239,7 @@ Manufacturer#5 almond antique medium spring khaki 6
Manufacturer#5 almond azure blanched chiffon midnight 23
Manufacturer#5 almond antique blue firebrick mint 31
Manufacturer#5 almond aquamarine dodger light gainsboro 46
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: -- non agg, non corr, Group By in Parent Query
select li.l_partkey, count(*)
from lineitem li
@@ -1278,7 +1274,7 @@ POSTHOOK: Input: default@lineitem
139636 1
175839 1
182052 1
-Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: -- alternate not in syntax
select *
from src
@@ -1442,7 +1438,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Input: default@t1_v
POSTHOOK: Output: database:default
POSTHOOK: Output: default@T2_v
-Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: explain
select *
from T1_v where T1_v.key not in (select T2_v.key from T2_v)
@@ -1452,50 +1448,12 @@ select *
from T1_v where T1_v.key not in (select T2_v.key from T2_v)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
- Stage-2 depends on stages: Stage-1
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE (key) END is null) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col0 = 0) (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
Stage: Stage-1
Map Reduce
Map Operator Tree:
@@ -1510,9 +1468,52 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- sort order:
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 is null (type: boolean)
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
TableScan
Reduce Output Operator
sort order:
@@ -1525,61 +1526,52 @@ STAGE PLANS:
0
1
outputColumnNames: _col0
- Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Stage: Stage-2
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
- TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE (key) END is null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reduce Operator Tree:
- Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: _col2 is null (type: boolean)
- Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
+ predicate: (_col0 = 0) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
@@ -1587,7 +1579,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: select *
from T1_v where T1_v.key not in (select T2_v.key from T2_v)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
index 9689ae3..150b527 100644
--- a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product
PREHOOK: query: -- non agg, non corr
-- JAVA_VERSION_SPECIFIC_OUTPUT
@@ -25,9 +25,9 @@ having key not in
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1, Stage-4
- Stage-3 depends on stages: Stage-2
- Stage-4 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2, Stage-5
+ Stage-5 is a root stage
Stage-0 depends on stages: Stage-3
STAGE PLANS:
@@ -72,38 +72,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
- sort order:
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: bigint)
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
TableScan
alias: src
@@ -127,24 +99,48 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: _col3 is null (type: boolean)
- Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ predicate: _col2 is null (type: boolean)
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-4
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index fa29dfe..871fa4e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -3732,108 +3732,105 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 6 <- Map 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 4
- File Output Operator [FS_28]
+ File Output Operator [FS_29]
compressed:false
- Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
- Select Operator [SEL_27]
+ Select Operator [SEL_28]
| outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
+ | Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
|<-Reducer 3 [SIMPLE_EDGE]
- Reduce Output Operator [RS_26]
+ Reduce Output Operator [RS_27]
key expressions:_col0 (type: string)
sort order:+
- Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
value expressions:_col1 (type: string)
- Select Operator [SEL_25]
- outputColumnNames:["_col0","_col1"]
- Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
- Filter Operator [FIL_31]
- predicate:_col3 is null (type: boolean)
- Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator [MERGEJOIN_36]
- | condition map:[{"":"Left Outer Join0 to 1"}]
- | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"}
- | outputColumnNames:["_col0","_col1","_col3"]
- | Statistics:Num rows: 605 Data size: 107690 Basic stats: COMPLETE Column stats: NONE
- |<-Map 7 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_22]
- | key expressions:_col0 (type: string)
- | Map-reduce partition columns:_col0 (type: string)
- | sort order:+
- | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- | Select Operator [SEL_16]
- | outputColumnNames:["_col0"]
- | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- | Filter Operator [FIL_34]
- | predicate:(key > '2') (type: boolean)
- | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
- | TableScan [TS_14]
- | alias:src_cbo
- | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- |<-Reducer 2 [SIMPLE_EDGE]
- Reduce Output Operator [RS_21]
- key expressions:_col0 (type: string)
- Map-reduce partition columns:_col0 (type: string)
- sort order:+
- Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
- value expressions:_col1 (type: string)
- Merge Join Operator [MERGEJOIN_35]
- | condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{}
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
- |<-Map 1 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_18]
- | sort order:
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- | value expressions:_col0 (type: string), _col1 (type: string)
- | Select Operator [SEL_2]
- | outputColumnNames:["_col0","_col1"]
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- | TableScan [TS_0]
- | alias:src_cbo
- | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- |<-Reducer 6 [SIMPLE_EDGE]
- Reduce Output Operator [RS_19]
+ Merge Join Operator [MERGEJOIN_37]
+ | condition map:[{"":"Inner Join 0 to 1"}]
+ | keys:{}
+ | outputColumnNames:["_col0","_col1"]
+ | Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE
+ |<-Reducer 2 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_21]
+ | sort order:
+ | Statistics:Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+ | value expressions:_col0 (type: string), _col1 (type: string)
+ | Filter Operator [FIL_32]
+ | predicate:_col2 is null (type: boolean)
+ | Statistics:Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE
+ | Merge Join Operator [MERGEJOIN_36]
+ | | condition map:[{"":"Left Outer Join0 to 1"}]
+ | | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"}
+ | | outputColumnNames:["_col0","_col1","_col2"]
+ | | Statistics:Num rows: 404 Data size: 107060 Basic stats: COMPLETE Column stats: COMPLETE
+ | |<-Map 1 [SIMPLE_EDGE]
+ | | Reduce Output Operator [RS_18]
+ | | key expressions:_col0 (type: string)
+ | | Map-reduce partition columns:_col0 (type: string)
+ | | sort order:+
+ | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | | value expressions:_col1 (type: string)
+ | | Select Operator [SEL_2]
+ | | outputColumnNames:["_col0","_col1"]
+ | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | | TableScan [TS_0]
+ | | alias:src_cbo
+ | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ | |<-Map 5 [SIMPLE_EDGE]
+ | Reduce Output Operator [RS_19]
+ | key expressions:_col0 (type: string)
+ | Map-reduce partition columns:_col0 (type: string)
+ | sort order:+
+ | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ | Select Operator [SEL_5]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ | Filter Operator [FIL_33]
+ | predicate:(key > '2') (type: boolean)
+ | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ | TableScan [TS_3]
+ | alias:src_cbo
+ | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ |<-Reducer 7 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_22]
+ sort order:
+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator [SEL_14]
+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Filter Operator [FIL_34]
+ predicate:(_col0 = 0) (type: boolean)
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator [GBY_13]
+ | aggregations:["count(VALUE._col0)"]
+ | outputColumnNames:["_col0"]
+ | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ |<-Map 6 [SIMPLE_EDGE]
+ Reduce Output Operator [RS_12]
sort order:
- Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Select Operator [SEL_11]
- Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Filter Operator [FIL_32]
- predicate:(_col0 = 0) (type: boolean)
- Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator [GBY_10]
- | aggregations:["count(VALUE._col0)"]
- | outputColumnNames:["_col0"]
- | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- |<-Map 5 [SIMPLE_EDGE]
- Reduce Output Operator [RS_9]
- sort order:
- Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions:_col0 (type: bigint)
- Group By Operator [GBY_8]
- aggregations:["count()"]
- outputColumnNames:["_col0"]
- Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator [SEL_5]
- Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator [FIL_33]
- predicate:((key > '2') and key is null) (type: boolean)
- Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan [TS_3]
- alias:src_cbo
- Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions:_col0 (type: bigint)
+ Group By Operator [GBY_11]
+ aggregations:["count()"]
+ outputColumnNames:["_col0"]
+ Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator [SEL_8]
+ Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator [FIL_35]
+ predicate:((key > '2') and key is null) (type: boolean)
+ Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan [TS_6]
+ alias:src_cbo
+ Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
PREHOOK: query: explain select p_mfgr, b.p_name, p_size
from part b
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out
new file mode 100644
index 0000000..52b4288
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out
@@ -0,0 +1,200 @@
+PREHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToInteger(_col0) (type: int)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+ Reducer 2
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ filter predicates:
+ 0
+ 1 {(KEY.reducesinkkey0 < 100)}
+ keys:
+ 0 UDFToInteger(_col0) (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToInteger(_col0) (type: int)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ filter predicates:
+ 0
+ 1 {(_col2 < 100)}
+ keys:
+ 0 UDFToInteger(_col0) (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
[2/2] hive git commit: HIVE-11684: Implement limit pushdown through
outer join in CBO (Jesus Camacho Rodriguez,
reviewed by Laljo John Pullokkaran)
Posted by jc...@apache.org.
HIVE-11684: Implement limit pushdown through outer join in CBO (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b900977
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b900977
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b900977
Branch: refs/heads/master
Commit: 0b90097703eb57dc8449af8d7d7882b8d5ca2b40
Parents: 97cb0c6
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Nov 16 11:32:21 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Nov 20 22:11:27 2015 +0100
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 11 +
.../test/resources/testconfiguration.properties | 1 +
.../calcite/reloperators/HiveSortLimit.java | 15 +-
.../rules/HiveProjectSortTransposeRule.java | 66 ++
.../calcite/rules/HiveSortJoinReduceRule.java | 135 +++
.../calcite/rules/HiveSortMergeRule.java | 78 ++
.../rules/HiveSortProjectTransposeRule.java | 79 ++
.../calcite/rules/HiveSortRemoveRule.java | 79 ++
.../calcite/stats/HiveRelMdRowCount.java | 18 +
.../calcite/stats/HiveRelMdSelectivity.java | 10 +
.../hadoop/hive/ql/parse/CalcitePlanner.java | 32 +-
.../clientpositive/limit_join_transpose.q | 99 +++
.../clientpositive/tez_dynpart_hashjoin_3.q | 24 +
.../clientpositive/join32_lessSize.q.out | 46 +-
.../results/clientpositive/join_merging.q.out | 34 +-
.../clientpositive/limit_join_transpose.q.out | 869 +++++++++++++++++++
.../clientpositive/spark/join32_lessSize.q.out | 122 +--
.../clientpositive/spark/join_merging.q.out | 76 +-
.../results/clientpositive/subquery_notin.q.out | 328 ++++---
.../subquery_notin_having.q.java1.7.out | 94 +-
.../clientpositive/tez/explainuser_1.q.out | 171 ++--
.../tez/tez_dynpart_hashjoin_3.q.out | 200 +++++
22 files changed, 2137 insertions(+), 450 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4f8209a..09f3c3e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1311,6 +1311,17 @@ public class HiveConf extends Configuration {
"we are increasing the number of files possibly by a big margin. So, we merge aggressively."),
HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."),
+ HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", false,
+ "Whether to push a limit through left/right outer join. If the value is true and the size of the outer\n" +
+ "input is reduced enough (as specified in hive.optimize.limitjointranspose.reduction), the limit is pushed\n" +
+ "to the outer input; to remain semantically correct, the limit is kept on top of the join too."),
+ HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage", 1.0f,
+ "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction of the\n" +
+ "size of the outer input of the join that we should get in order to apply the rule."),
+ HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples", (long) 0,
+ "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction in the\n" +
+ "number of tuples of the outer input of the join that you should get in order to apply the rule."),
+
HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false,
"Whether to create a separate plan for skewed keys for the tables in the join.\n" +
"This is based on the skewed keys stored in the metadata. At compile time, the plan is broken\n" +
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b86dd19..0c5140c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -388,6 +388,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
tez_insert_overwrite_local_directory_1.q,\
tez_dynpart_hashjoin_1.q,\
tez_dynpart_hashjoin_2.q,\
+ tez_dynpart_hashjoin_3.q,\
tez_vector_dynpart_hashjoin_1.q,\
tez_vector_dynpart_hashjoin_2.q,\
tez_join_hash.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
index 82fdc0e..3077177 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java
@@ -44,6 +44,8 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
// 4. This is used by ASTConverter after we are done with Calcite Planning
private ImmutableMap<Integer, RexNode> mapOfInputRefToRexCall;
+ private boolean ruleCreated;
+
public HiveSortLimit(RelOptCluster cluster, RelTraitSet traitSet, RelNode child,
RelCollation collation, RexNode offset, RexNode fetch) {
super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation,
@@ -74,7 +76,10 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
// TODO: can we blindly copy sort trait? What if inputs changed and we
// are now sorting by different cols
RelCollation canonizedCollation = traitSet.canonize(newCollation);
- return new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+ HiveSortLimit sortLimit =
+ new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch);
+ sortLimit.setRuleCreated(ruleCreated);
+ return sortLimit;
}
public RexNode getFetchExpr() {
@@ -93,6 +98,14 @@ public class HiveSortLimit extends Sort implements HiveRelNode {
public void implement(Implementor implementor) {
}
+ public boolean isRuleCreated() {
+ return ruleCreated;
+ }
+
+ public void setRuleCreated(boolean ruleCreated) {
+ this.ruleCreated = ruleCreated;
+ }
+
private static class HiveSortRelFactory implements RelFactories.SortFactory {
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java
new file mode 100644
index 0000000..9d74b19
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java
@@ -0,0 +1,66 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveProjectSortTransposeRule extends RelOptRule {
+
+ public static final HiveProjectSortTransposeRule INSTANCE =
+ new HiveProjectSortTransposeRule();
+
+ //~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a HiveProjectSortTransposeRule.
+ */
+ private HiveProjectSortTransposeRule() {
+ super(
+ operand(
+ HiveProject.class,
+ operand(HiveSortLimit.class, any())));
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ // implement RelOptRule
+ public void onMatch(RelOptRuleCall call) {
+ final HiveProject project = call.rel(0);
+ final HiveSortLimit sort = call.rel(1);
+
+ // Determine mapping between project input and output fields. If sort
+ // relies on non-trivial expressions, we can't push.
+ final Mappings.TargetMapping map =
+ RelOptUtil.permutation(
+ project.getProjects(), project.getInput().getRowType()).inverse();
+ for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) {
+ if (map.getTarget(fc.getFieldIndex()) < 0) {
+ return;
+ }
+ }
+
+ // Create new collation
+ final RelCollation newCollation =
+ RelCollationTraitDef.INSTANCE.canonize(
+ RexUtil.apply(map, sort.getCollation()));
+
+ // New operators
+ final RelNode newProject = project.copy(sort.getInput().getTraitSet(),
+ ImmutableList.<RelNode>of(sort.getInput()));
+ final HiveSortLimit newSort = sort.copy(newProject.getTraitSet(),
+ newProject, newCollation, sort.offset, sort.fetch);
+
+ call.transformTo(newSort);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
new file mode 100644
index 0000000..d8b9c54
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * Planner rule that pushes
+ * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit}
+ * past a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin}.
+ */
+public class HiveSortJoinReduceRule extends RelOptRule {
+
+ public static final HiveSortJoinReduceRule INSTANCE =
+ new HiveSortJoinReduceRule();
+
+ //~ Constructors -----------------------------------------------------------
+
+ private HiveSortJoinReduceRule() {
+ super(
+ operand(
+ HiveSortLimit.class,
+ operand(HiveJoin.class, any())));
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ final HiveSortLimit sortLimit = call.rel(0);
+ final HiveJoin join = call.rel(1);
+
+ // If sort does not contain a limit operation, we bail out
+ if (!HiveCalciteUtil.limitRelNode(sortLimit)) {
+ return false;
+ }
+
+ // 1) If join is not a left or right outer, we bail out
+ // 2) If any sort column is not part of the input where the
+ // sort is pushed, we bail out
+ RelNode reducedInput;
+ if (join.getJoinType() == JoinRelType.LEFT) {
+ reducedInput = join.getLeft();
+ if (sortLimit.getCollation() != RelCollations.EMPTY) {
+ for (RelFieldCollation relFieldCollation
+ : sortLimit.getCollation().getFieldCollations()) {
+ if (relFieldCollation.getFieldIndex()
+ >= join.getLeft().getRowType().getFieldCount()) {
+ return false;
+ }
+ }
+ }
+ } else if (join.getJoinType() == JoinRelType.RIGHT) {
+ reducedInput = join.getRight();
+ if (sortLimit.getCollation() != RelCollations.EMPTY) {
+ for (RelFieldCollation relFieldCollation
+ : sortLimit.getCollation().getFieldCollations()) {
+ if (relFieldCollation.getFieldIndex()
+ < join.getLeft().getRowType().getFieldCount()) {
+ return false;
+ }
+ }
+ }
+ } else {
+ return false;
+ }
+
+ // Finally, if we do not reduce the input size, we bail out
+ if (RexLiteral.intValue(sortLimit.fetch)
+ >= RelMetadataQuery.getRowCount(reducedInput)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final HiveSortLimit sortLimit = call.rel(0);
+ final HiveJoin join = call.rel(1);
+ RelNode inputLeft = join.getLeft();
+ RelNode inputRight = join.getRight();
+
+ // We create a new sort operator on the corresponding input
+ if (join.getJoinType() == JoinRelType.LEFT) {
+ inputLeft = sortLimit.copy(sortLimit.getTraitSet(), inputLeft,
+ sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch);
+ ((HiveSortLimit) inputLeft).setRuleCreated(true);
+ } else {
+ // Adjust right collation
+ final RelCollation rightCollation =
+ RelCollationTraitDef.INSTANCE.canonize(
+ RelCollations.shift(sortLimit.getCollation(),
+ -join.getLeft().getRowType().getFieldCount()));
+ inputRight = sortLimit.copy(sortLimit.getTraitSet().replace(rightCollation), inputRight,
+ rightCollation, sortLimit.offset, sortLimit.fetch);
+ ((HiveSortLimit) inputRight).setRuleCreated(true);
+ }
+ // We copy the join and the top sort operator
+ RelNode result = join.copy(join.getTraitSet(), join.getCondition(), inputLeft,
+ inputRight, join.getJoinType(), join.isSemiJoinDone());
+ result = sortLimit.copy(sortLimit.getTraitSet(), result, sortLimit.getCollation(),
+ sortLimit.offset, sortLimit.fetch);
+
+ call.transformTo(result);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java
new file mode 100644
index 0000000..ebb20c2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java
@@ -0,0 +1,78 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * This rule will merge two HiveSortLimit operators.
+ *
+ * It is applied when the top match is a pure limit operation (no sorting).
+ *
+ * If the bottom operator is not synthetic and does not contain a limit,
+ * we currently bail out. Thus, we avoid a lot of unnecessary limit operations
+ * in the middle of the execution plan that could create performance regressions.
+ */
+public class HiveSortMergeRule extends RelOptRule {
+
+ public static final HiveSortMergeRule INSTANCE =
+ new HiveSortMergeRule();
+
+ //~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a HiveSortProjectTransposeRule.
+ */
+ private HiveSortMergeRule() {
+ super(
+ operand(
+ HiveSortLimit.class,
+ operand(HiveSortLimit.class, any())));
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ final HiveSortLimit topSortLimit = call.rel(0);
+ final HiveSortLimit bottomSortLimit = call.rel(1);
+
+ // If top operator is not a pure limit, we bail out
+ if (!HiveCalciteUtil.pureLimitRelNode(topSortLimit)) {
+ return false;
+ }
+
+ // If the bottom operator is not synthetic and it does not contain a limit,
+ // we will bail out; we do not want to end up with limits all over the tree
+ if (topSortLimit.isRuleCreated() && !bottomSortLimit.isRuleCreated() &&
+ bottomSortLimit.fetch == null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ // implement RelOptRule
+ public void onMatch(RelOptRuleCall call) {
+ final HiveSortLimit topSortLimit = call.rel(0);
+ final HiveSortLimit bottomSortLimit = call.rel(1);
+
+ // Lowest limit
+ final RexNode newLimit;
+ if (bottomSortLimit.fetch != null && RexLiteral.intValue(topSortLimit.fetch)
+ >= RexLiteral.intValue(bottomSortLimit.fetch)) {
+ newLimit = bottomSortLimit.fetch;
+ } else {
+ newLimit = topSortLimit.fetch;
+ }
+
+ final HiveSortLimit newSort = bottomSortLimit.copy(bottomSortLimit.getTraitSet(),
+ bottomSortLimit.getInput(), bottomSortLimit.collation, null, newLimit);
+
+ call.transformTo(newSort);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java
new file mode 100644
index 0000000..debfbe0
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java
@@ -0,0 +1,79 @@
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelCollation;
+import org.apache.calcite.rel.RelCollationTraitDef;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveSortProjectTransposeRule extends RelOptRule {
+
+ public static final HiveSortProjectTransposeRule INSTANCE =
+ new HiveSortProjectTransposeRule();
+
+ //~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a HiveSortProjectTransposeRule.
+ */
+ private HiveSortProjectTransposeRule() {
+ super(
+ operand(
+ HiveSortLimit.class,
+ operand(HiveProject.class, any())));
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ final HiveSortLimit sortLimit = call.rel(0);
+
+ // If does not contain a limit operation, we bail out
+ if (!HiveCalciteUtil.limitRelNode(sortLimit)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ // implement RelOptRule
+ public void onMatch(RelOptRuleCall call) {
+ final HiveSortLimit sort = call.rel(0);
+ final HiveProject project = call.rel(1);
+
+ // Determine mapping between project input and output fields. If sort
+ // relies on non-trivial expressions, we can't push.
+ final Mappings.TargetMapping map =
+ RelOptUtil.permutation(
+ project.getProjects(), project.getInput().getRowType());
+ for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) {
+ if (map.getTargetOpt(fc.getFieldIndex()) < 0) {
+ return;
+ }
+ }
+
+ // Create new collation
+ final RelCollation newCollation =
+ RelCollationTraitDef.INSTANCE.canonize(
+ RexUtil.apply(map, sort.getCollation()));
+
+ // New operators
+ final HiveSortLimit newSort = sort.copy(sort.getTraitSet().replace(newCollation),
+ project.getInput(), newCollation, sort.offset, sort.fetch);
+ final RelNode newProject = project.copy(sort.getTraitSet(),
+ ImmutableList.<RelNode>of(newSort));
+
+ call.transformTo(newProject);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
new file mode 100644
index 0000000..618c717
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * Planner rule that removes
+ * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit}.
+ */
+public class HiveSortRemoveRule extends RelOptRule {
+
+ protected final float reductionProportion;
+ protected final float reductionTuples;
+
+ //~ Constructors -----------------------------------------------------------
+
+ public HiveSortRemoveRule(float reductionProportion, long reductionTuples) {
+ this(operand(HiveSortLimit.class, any()), reductionProportion, reductionTuples);
+ }
+
+ private HiveSortRemoveRule(RelOptRuleOperand operand, float reductionProportion,
+ long reductionTuples) {
+ super(operand);
+ this.reductionProportion = reductionProportion;
+ this.reductionTuples = reductionTuples;
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ final HiveSortLimit sortLimit = call.rel(0);
+
+ // If it is not created by HiveSortJoinReduceRule, we cannot remove it
+ if (!sortLimit.isRuleCreated()) {
+ return false;
+ }
+
+ // Finally, if we do not reduce the size input enough, we bail out
+ int limit = RexLiteral.intValue(sortLimit.fetch);
+ Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput());
+ if (rowCount != null && limit <= reductionProportion * rowCount &&
+ rowCount - limit >= reductionTuples) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final HiveSortLimit sortLimit = call.rel(0);
+
+ // We remove the limit operator
+ call.transformTo(sortLimit.getInput());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index 728c5aa..caf8978 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -30,6 +30,7 @@ import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdRowCount;
@@ -38,6 +39,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
@@ -87,6 +89,22 @@ public class HiveRelMdRowCount extends RelMdRowCount {
return super.getRowCount(rel);
}
+ @Override
+ public Double getRowCount(Sort rel) {
+ final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput());
+ if (rowCount != null && rel.fetch != null) {
+ final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset);
+ final int limit = RexLiteral.intValue(rel.fetch);
+ final Double offsetLimit = new Double(offset + limit);
+ // offsetLimit is smaller than rowCount of the input operator
+ // thus, we return the offsetLimit
+ if (offsetLimit < rowCount) {
+ return offsetLimit;
+ }
+ }
+ return rowCount;
+ }
+
static class PKFKRelationInfo {
public final int fkSide;
public final double ndvScalingFactor;
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
index 715f24f..a0eb83d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
@@ -61,6 +61,16 @@ public class HiveRelMdSelectivity extends RelMdSelectivity {
public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException {
if (j.getJoinType().equals(JoinRelType.INNER)) {
return computeInnerJoinSelectivity(j, predicate);
+ } else if (j.getJoinType().equals(JoinRelType.LEFT) ||
+ j.getJoinType().equals(JoinRelType.RIGHT)) {
+ double left = RelMetadataQuery.getRowCount(j.getLeft());
+ double right = RelMetadataQuery.getRowCount(j.getRight());
+ double product = left * right;
+ double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate);
+ if (j.getJoinType().equals(JoinRelType.LEFT)) {
+ return Math.max(innerJoinSelectivity, left/product);
+ }
+ return Math.max(innerJoinSelectivity, right/product);
}
return 1.0;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index de67b54..bfe4633 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -148,8 +148,13 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter;
@@ -953,16 +958,31 @@ public class CalcitePlanner extends SemanticAnalyzer {
basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE);
}
- // 1. Push Down Semi Joins
+ // 1. Push down limit through outer join
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) {
+ // This should be a cost based decision, but till we enable the extended cost
+ // model, we will use the given value for the variable
+ final float reductionProportion = HiveConf.getFloatVar(conf,
+ HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE);
+ final long reductionTuples = HiveConf.getLongVar(conf,
+ HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES);
+ basePlan = hepPlan(basePlan, true, mdProvider, HiveSortMergeRule.INSTANCE,
+ HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE);
+ basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP,
+ new HiveSortRemoveRule(reductionProportion, reductionTuples),
+ HiveProjectSortTransposeRule.INSTANCE);
+ }
+
+ // 2. Push Down Semi Joins
basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE,
SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
- // 2. Add not null filters
+ // 3. Add not null filters
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE);
}
- // 3. Constant propagation, common filter extraction, and PPD
+ // 4. Constant propagation, common filter extraction, and PPD
basePlan = hepPlan(basePlan, true, mdProvider,
ReduceExpressionsRule.PROJECT_INSTANCE,
ReduceExpressionsRule.FILTER_INSTANCE,
@@ -976,12 +996,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
new FilterAggregateTransposeRule(Filter.class,
HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class));
- // 4. Transitive inference & Partition Pruning
+ // 5. Transitive inference & Partition Pruning
basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule(
Join.class, HiveFilter.DEFAULT_FILTER_FACTORY),
new HivePartitionPruneRule(conf));
- // 5. Projection Pruning
+ // 6. Projection Pruning
HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
cluster, HiveProject.DEFAULT_PROJECT_FACTORY,
HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY,
@@ -989,7 +1009,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);
basePlan = fieldTrimmer.trim(basePlan);
- // 6. Rerun PPD through Project as column pruning would have introduced DT
+ // 7. Rerun PPD through Project as column pruning would have introduced DT
// above scans
basePlan = hepPlan(basePlan, true, mdProvider,
new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY,
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/queries/clientpositive/limit_join_transpose.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/limit_join_transpose.q b/ql/src/test/queries/clientpositive/limit_join_transpose.q
new file mode 100644
index 0000000..bfc2378
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/limit_join_transpose.q
@@ -0,0 +1,99 @@
+set hive.optimize.limitjointranspose=false;
+
+explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+
+set hive.optimize.limitjointranspose=true;
+set hive.optimize.limitjointranspose.reductionpercentage=0.0001f;
+set hive.optimize.limitjointranspose.reductiontuples=10;
+
+explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+
+set hive.optimize.limitjointranspose.reductionpercentage=0.1f;
+set hive.optimize.limitjointranspose.reductiontuples=10;
+
+explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1;
+
+explain
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+set hive.optimize.limitjointranspose.reductionpercentage=1f;
+set hive.optimize.limitjointranspose.reductiontuples=0;
+
+explain
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1;
+
+explain
+select *
+from src src1 right outer join (
+ select src2.key, src2.value
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1;
+
+select *
+from src src1 right outer join (
+ select src2.key, src2.value
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
new file mode 100644
index 0000000..1994b40
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q
@@ -0,0 +1,24 @@
+set hive.optimize.limitjointranspose=true;
+set hive.optimize.limitjointranspose.reductionpercentage=0.1f;
+set hive.optimize.limitjointranspose.reductiontuples=100;
+set hive.explain.user=false;
+set hive.auto.convert.join=false;
+set hive.optimize.dynamic.partition.hashjoin=false;
+
+explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1;
+
+
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join.noconditionaltask.size=200000;
+set hive.exec.reducers.bytes.per.reducer=200000;
+
+explain
+select a.*
+from alltypesorc a left outer join src b
+on a.cint = cast(b.key as int) and (a.cint < 100)
+limit 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out
index 5ea4024..8e71710 100644
--- a/ql/src/test/results/clientpositive/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out
@@ -1920,22 +1920,22 @@ TOK_QUERY
STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-7 depends on stages: Stage-9
- Stage-8 depends on stages: Stage-7
+ Stage-8 is a root stage
Stage-6 depends on stages: Stage-8
- Stage-0 depends on stages: Stage-6
- Stage-3 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-6
+ Stage-5 depends on stages: Stage-7
+ Stage-0 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-9
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:$hdt$_0:y
+ $hdt$_1:$hdt$_1:y
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:$hdt$_0:y
+ $hdt$_1:$hdt$_1:y
TableScan
alias: y
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -1950,7 +1950,7 @@ STAGE PLANS:
1 _col0 (type: string)
Position of Big Table: 1
- Stage: Stage-7
+ Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
@@ -2089,12 +2089,12 @@ STAGE PLANS:
name: default.src1
name: default.src1
Truncated Path -> Alias:
- /src1 [$hdt$_0:$hdt$_1:x]
+ /src1 [$hdt$_1:$hdt$_2:x]
- Stage: Stage-8
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:z
+ $hdt$_0:z
Fetch Operator
limit: -1
Partition Description:
@@ -2144,7 +2144,7 @@ STAGE PLANS:
name: default.srcpart
name: default.srcpart
Alias -> Map Local Operator Tree:
- $hdt$_1:z
+ $hdt$_0:z
TableScan
alias: z
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -2159,11 +2159,11 @@ STAGE PLANS:
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- Position of Big Table: 0
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ Position of Big Table: 1
- Stage: Stage-6
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -2172,13 +2172,13 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Position of Big Table: 0
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3, _col4
+ Position of Big Table: 1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string)
+ expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2313,7 +2313,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j2
- Stage: Stage-3
+ Stage: Stage-2
Stats-Aggr Operator
#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/join_merging.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out
index b42bdc5..9ab9587 100644
--- a/ql/src/test/results/clientpositive/join_merging.q.out
+++ b/ql/src/test/results/clientpositive/join_merging.q.out
@@ -97,12 +97,12 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey
p1.p_size > 10 and p1.p_size > p2.p_size + 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
@@ -153,17 +153,10 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-2
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
- Reduce Output Operator
- key expressions: _col2 (type: int)
- sort order: +
- Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int), _col3 (type: int)
- TableScan
alias: p1
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -175,17 +168,24 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col3 (type: int)
Reduce Operator Tree:
Join Operator
condition map:
- Right Outer Join0 to 1
+ Left Outer Join0 to 1
keys:
- 0 _col2 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1, _col3
+ 0 _col0 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col2, _col4
Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: int), _col3 (type: int)
+ expressions: _col2 (type: int), _col4 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/0b900977/ql/src/test/results/clientpositive/limit_join_transpose.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/limit_join_transpose.q.out b/ql/src/test/results/clientpositive/limit_join_transpose.q.out
new file mode 100644
index 0000000..8717154
--- /dev/null
+++ b/ql/src/test/results/clientpositive/limit_join_transpose.q.out
@@ -0,0 +1,869 @@
+PREHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 0 val_0
+PREHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 0 val_0
+PREHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 left outer join src src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238 238 val_238
+PREHOOK: query: explain
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col2 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238 238 val_238 238 val_238
+PREHOOK: query: explain
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 right outer join (
+ select *
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238 val_238 238 val_238 238 val_238
+PREHOOK: query: explain
+select *
+from src src1 right outer join (
+ select src2.key, src2.value
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from src src1 right outer join (
+ select src2.key, src2.value
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-3 depends on stages: Stage-5
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src src1 right outer join (
+ select src2.key, src2.value
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src src1 right outer join (
+ select src2.key, src2.value
+ from src src2 left outer join src src3
+ on src2.value = src3.value) src2
+on src1.key = src2.key
+order by src2.key
+limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0 0 val_0